Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,6 @@ enum class TailFoldingStyle {
/// active.lane.mask to calculate the mask for the next iteration. If the
/// increment overflows, the mask is no longer correct.
DataAndControlFlow,
/// Use predicate to control both data and control flow, but modify
/// the trip count so that a runtime overflow check can be avoided
/// and such that the scalar epilogue loop can always be removed.
DataAndControlFlowWithoutRuntimeCheck,
/// Use predicated EVL instructions for tail-folding.
/// Indicates that VP intrinsics should be used.
DataWithEVL,
Expand Down Expand Up @@ -754,13 +750,7 @@ class TargetTransformInfo {
LLVM_ABI bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const;

/// Query the target what the preferred style of tail folding is.
/// \param IVUpdateMayOverflow Tells whether it is known if the IV update
/// may (or will never) overflow for the suggested VF/UF in the given loop.
/// Targets can use this information to select a more optimal tail folding
/// style. The value conservatively defaults to true, such that no assumptions
/// are made on overflow.
LLVM_ABI TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
LLVM_ABI TailFoldingStyle getPreferredTailFoldingStyle() const;

// Parameters that control the loop peeling transformation
struct PeelingPreferences {
Expand Down
3 changes: 1 addition & 2 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,7 @@ class TargetTransformInfoImplBase {
return false;
}

virtual TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
virtual TailFoldingStyle getPreferredTailFoldingStyle() const {
return TailFoldingStyle::DataWithoutLaneMask;
}

Expand Down
5 changes: 2 additions & 3 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -801,9 +801,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return BaseT::preferPredicateOverEpilogue(TFI);
}

TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const override {
return BaseT::getPreferredTailFoldingStyle(IVUpdateMayOverflow);
TailFoldingStyle getPreferredTailFoldingStyle() const override {
return BaseT::getPreferredTailFoldingStyle();
}

std::optional<Instruction *>
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,8 @@ bool TargetTransformInfo::preferPredicateOverEpilogue(
return TTIImpl->preferPredicateOverEpilogue(TFI);
}

TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow) const {
return TTIImpl->getPreferredTailFoldingStyle(IVUpdateMayOverflow);
TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle() const {
return TTIImpl->getPreferredTailFoldingStyle();
}

std::optional<Instruction *>
Expand Down
11 changes: 3 additions & 8 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,14 +464,9 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
return ST->hasSVE() ? 5 : 0;
}

TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
if (ST->hasSVE())
return IVUpdateMayOverflow
? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
: TailFoldingStyle::DataAndControlFlow;

return TailFoldingStyle::DataWithoutLaneMask;
TailFoldingStyle getPreferredTailFoldingStyle() const override {
return ST->hasSVE() ? TailFoldingStyle::DataAndControlFlow
: TailFoldingStyle::DataWithoutLaneMask;
}

bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override;
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2670,8 +2670,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) const {
*LVL->getDominatorTree());
}

TailFoldingStyle
ARMTTIImpl::getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
TailFoldingStyle ARMTTIImpl::getPreferredTailFoldingStyle() const {
if (!ST->hasMVEIntegerOps() || !EnableTailPredication)
return TailFoldingStyle::DataWithoutLaneMask;

Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,7 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) const override;

TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const override;
TailFoldingStyle getPreferredTailFoldingStyle() const override;

void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP) const override;
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,7 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override {
return ST->hasVInstructions();
}
TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
TailFoldingStyle getPreferredTailFoldingStyle() const override {
return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
: TailFoldingStyle::None;
}
Expand Down
62 changes: 20 additions & 42 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,6 @@ static cl::opt<TailFoldingStyle> ForceTailFoldingStyle(
clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control",
"Create lane mask using active.lane.mask intrinsic, and use "
"it for both data and control flow"),
clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck,
"data-and-control-without-rt-check",
"Similar to data-and-control, but remove the runtime check"),
clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl",
"Use predicated EVL instructions for tail folding. If EVL "
"is unsupported, fallback to data-without-lane-mask.")));
Expand Down Expand Up @@ -1327,34 +1324,27 @@ class LoopVectorizationCostModel {
}

/// Returns the TailFoldingStyle that is best for the current loop.
TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
if (!ChosenTailFoldingStyle)
return TailFoldingStyle::None;
return IVUpdateMayOverflow ? ChosenTailFoldingStyle->first
: ChosenTailFoldingStyle->second;
TailFoldingStyle getTailFoldingStyle() const {
return ChosenTailFoldingStyle;
}

/// Selects and saves TailFoldingStyle for 2 options - if IV update may
/// overflow or not.
/// Selects and saves TailFoldingStyle.
/// \param IsScalableVF true if scalable vector factors enabled.
/// \param UserIC User specific interleave count.
void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) {
assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet.");
void setTailFoldingStyle(bool IsScalableVF, unsigned UserIC) {
assert(ChosenTailFoldingStyle == TailFoldingStyle::None &&
"Tail folding must not be selected yet.");
if (!Legal->canFoldTailByMasking()) {
ChosenTailFoldingStyle = {TailFoldingStyle::None, TailFoldingStyle::None};
ChosenTailFoldingStyle = TailFoldingStyle::None;
return;
}

// Default to TTI preference, but allow command line override.
ChosenTailFoldingStyle = {
TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true),
TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false)};
ChosenTailFoldingStyle = TTI.getPreferredTailFoldingStyle();
if (ForceTailFoldingStyle.getNumOccurrences())
ChosenTailFoldingStyle = {ForceTailFoldingStyle.getValue(),
ForceTailFoldingStyle.getValue()};
ChosenTailFoldingStyle = ForceTailFoldingStyle.getValue();

if (ChosenTailFoldingStyle->first != TailFoldingStyle::DataWithEVL &&
ChosenTailFoldingStyle->second != TailFoldingStyle::DataWithEVL)
if (ChosenTailFoldingStyle != TailFoldingStyle::DataWithEVL)
return;
// Override EVL styles if needed.
// FIXME: Investigate opportunity for fixed vector factor.
Expand All @@ -1366,10 +1356,9 @@ class LoopVectorizationCostModel {
// if it's allowed, or DataWithoutLaneMask otherwise.
if (ScalarEpilogueStatus == CM_ScalarEpilogueAllowed ||
ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate)
ChosenTailFoldingStyle = {TailFoldingStyle::None, TailFoldingStyle::None};
ChosenTailFoldingStyle = TailFoldingStyle::None;
else
ChosenTailFoldingStyle = {TailFoldingStyle::DataWithoutLaneMask,
TailFoldingStyle::DataWithoutLaneMask};
ChosenTailFoldingStyle = TailFoldingStyle::DataWithoutLaneMask;

LLVM_DEBUG(
dbgs() << "LV: Preference for VP intrinsics indicated. Will "
Expand All @@ -1381,8 +1370,6 @@ class LoopVectorizationCostModel {

/// Returns true if all loop blocks should be masked to fold tail loop.
bool foldTailByMasking() const {
// TODO: check if it is possible to check for None style independent of
// IVUpdateMayOverflow flag in getTailFoldingStyle.
return getTailFoldingStyle() != TailFoldingStyle::None;
}

Expand All @@ -1392,9 +1379,7 @@ class LoopVectorizationCostModel {
if (!EnableWideActiveLaneMask)
return false;

TailFoldingStyle TF = getTailFoldingStyle();
return TF == TailFoldingStyle::DataAndControlFlow ||
TF == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
return getTailFoldingStyle() == TailFoldingStyle::DataAndControlFlow;
}

/// Return maximum safe number of elements to be processed per vector
Expand Down Expand Up @@ -1606,10 +1591,8 @@ class LoopVectorizationCostModel {
/// iterations to execute in the scalar loop.
ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;

/// Control finally chosen tail folding style. The first element is used if
/// the IV update may overflow, the second element - if it does not.
std::optional<std::pair<TailFoldingStyle, TailFoldingStyle>>
ChosenTailFoldingStyle;
/// Control finally chosen tail folding style.
TailFoldingStyle ChosenTailFoldingStyle = TailFoldingStyle::None;

/// true if scalable vectorization is supported and enabled.
std::optional<bool> IsScalableVectorizationAllowed;
Expand Down Expand Up @@ -2098,13 +2081,11 @@ class GeneratedRTChecks {

static bool useActiveLaneMask(TailFoldingStyle Style) {
return Style == TailFoldingStyle::Data ||
Style == TailFoldingStyle::DataAndControlFlow ||
Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
Style == TailFoldingStyle::DataAndControlFlow;
}

static bool useActiveLaneMaskForControlFlow(TailFoldingStyle Style) {
return Style == TailFoldingStyle::DataAndControlFlow ||
Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
return Style == TailFoldingStyle::DataAndControlFlow;
}

// Return true if \p OuterLp is an outer loop annotated with hints for explicit
Expand Down Expand Up @@ -3727,7 +3708,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
// by masking.
// FIXME: look for a smaller MaxVF that does divide TC rather than masking.
bool ContainsScalableVF = MaxFactors.ScalableVF.isNonZero();
setTailFoldingStyles(ContainsScalableVF, UserIC);
setTailFoldingStyle(ContainsScalableVF, UserIC);
if (foldTailByMasking()) {
if (foldTailWithEVL()) {
LLVM_DEBUG(
Expand Down Expand Up @@ -8203,7 +8184,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
for (ElementCount VF : Range)
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);

TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
TailFoldingStyle Style = CM.getTailFoldingStyle();
// Use NUW for the induction increment if we proved that it won't overflow in
// the vector loop or when not folding the tail. In the later case, we know
// that the canonical induction increment will not overflow as the vector trip
Expand Down Expand Up @@ -8415,10 +8396,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
// TailFoldingStyle is visible there.
bool ForControlFlow = useActiveLaneMaskForControlFlow(Style);
bool WithoutRuntimeCheck =
Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
WithoutRuntimeCheck);
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow);
}
VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues, PSE);

Expand Down
59 changes: 13 additions & 46 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2825,42 +2825,31 @@ void VPlanTransforms::optimize(VPlan &Plan) {
// dropped from the canonical IV increment. Return the created
// VPActiveLaneMaskPHIRecipe.
//
// The function uses the following definitions:
//
// %TripCount = DataWithControlFlowWithoutRuntimeCheck ?
// calculate-trip-count-minus-VF (original TC) : original TC
// %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ?
// CanonicalIVPhi : CanonicalIVIncrement
// %StartV is the canonical induction start value.
//
// The function adds the following recipes:
//
// vector.ph:
// %TripCount = calculate-trip-count-minus-VF (original TC)
// [if DataWithControlFlowWithoutRuntimeCheck]
// %EntryInc = canonical-iv-increment-for-part %StartV
// %EntryALM = active-lane-mask %EntryInc, %TripCount
// %EntryInc = canonical-iv-increment-for-part CanonicalIVStart
// %EntryALM = active-lane-mask %EntryInc, TC
//
// vector.body:
// ...
// %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ]
// ...
// %InLoopInc = canonical-iv-increment-for-part %IncrementValue
// %ALM = active-lane-mask %InLoopInc, TripCount
// %InLoopInc = canonical-iv-increment-for-part CanonicalIVIncrement
// %ALM = active-lane-mask %InLoopInc, TC
// %Negated = Not %ALM
// branch-on-cond %Negated
//
static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
static VPActiveLaneMaskPHIRecipe *
addVPLaneMaskPhiAndUpdateExitBranch(VPlan &Plan) {
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
auto *CanonicalIVPHI = TopRegion->getCanonicalIV();
VPValue *StartV = CanonicalIVPHI->getStartValue();

auto *CanonicalIVIncrement =
cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
// TODO: Check if dropping the flags is needed if
// !DataAndControlFlowWithoutRuntimeCheck.
// TODO: Check if dropping the flags is needed.
CanonicalIVIncrement->dropPoisonGeneratingFlags();
DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
// We can't use StartV directly in the ActiveLaneMask VPInstruction, since
Expand All @@ -2871,24 +2860,8 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(

// Create the ActiveLaneMask instruction using the correct start values.
VPValue *TC = Plan.getTripCount();
VPValue *VFxUF = &Plan.getVFxUF();
VPValue *VF = &Plan.getVF();

VPValue *TripCount, *IncrementValue;
if (!DataAndControlFlowWithoutRuntimeCheck) {
// When the loop is guarded by a runtime overflow check for the loop
// induction variable increment by VF, we can increment the value before
// the get.active.lane mask and use the unmodified tripcount.
IncrementValue = CanonicalIVIncrement;
TripCount = TC;
} else {
// When avoiding a runtime check, the active.lane.mask inside the loop
// uses a modified trip count and the induction variable increment is
// done after the active.lane.mask intrinsic is called.
IncrementValue = CanonicalIVPHI;
TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
{TC, VFxUF}, DL);
}
auto *EntryIncrement = Builder.createOverflowingOp(
VPInstruction::CanonicalIVIncrementForPart, {StartV, VF}, {false, false},
DL, "index.part.next");
Expand All @@ -2912,10 +2885,10 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
Builder.setInsertPoint(OriginalTerminator);
auto *InLoopIncrement = Builder.createOverflowingOp(
VPInstruction::CanonicalIVIncrementForPart,
{IncrementValue, &Plan.getVF()}, {false, false}, DL);
{CanonicalIVIncrement, &Plan.getVF()}, {false, false}, DL);
auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
{InLoopIncrement, TripCount, ALMMultiplier},
DL, "active.lane.mask.next");
{InLoopIncrement, TC, ALMMultiplier}, DL,
"active.lane.mask.next");
LaneMaskPhi->addOperand(ALM);

// Replace the original terminator with BranchOnCond. We have to invert the
Expand All @@ -2926,14 +2899,8 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
return LaneMaskPhi;
}

void VPlanTransforms::addActiveLaneMask(
VPlan &Plan, bool UseActiveLaneMaskForControlFlow,
bool DataAndControlFlowWithoutRuntimeCheck) {
assert((!DataAndControlFlowWithoutRuntimeCheck ||
UseActiveLaneMaskForControlFlow) &&
"DataAndControlFlowWithoutRuntimeCheck implies "
"UseActiveLaneMaskForControlFlow");

void VPlanTransforms::addActiveLaneMask(VPlan &Plan,
bool UseActiveLaneMaskForControlFlow) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
auto *FoundWidenCanonicalIVUser = find_if(
LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
Expand All @@ -2944,7 +2911,7 @@ void VPlanTransforms::addActiveLaneMask(
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
VPSingleDefRecipe *LaneMask;
if (UseActiveLaneMaskForControlFlow) {
LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(Plan, false);
LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(Plan);
} else {
VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
VPValue *ALMMultiplier =
Expand Down
9 changes: 2 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,14 +253,9 @@ struct VPlanTransforms {
/// Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an
/// (active-lane-mask recipe, wide canonical IV, trip-count). If \p
/// UseActiveLaneMaskForControlFlow is true, introduce an
/// VPActiveLaneMaskPHIRecipe. If \p DataAndControlFlowWithoutRuntimeCheck is
/// true, no minimum-iteration runtime check will be created (during skeleton
/// creation) and instead it is handled using active-lane-mask. \p
/// DataAndControlFlowWithoutRuntimeCheck implies \p
/// UseActiveLaneMaskForControlFlow.
/// VPActiveLaneMaskPHIRecipe.
static void addActiveLaneMask(VPlan &Plan,
bool UseActiveLaneMaskForControlFlow,
bool DataAndControlFlowWithoutRuntimeCheck);
bool UseActiveLaneMaskForControlFlow);

/// Insert truncates and extends for any truncated recipe. Redundant casts
/// will be folded later.
Expand Down