Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8328,10 +8328,12 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
*Plan, CM.getMinimalBitwidths());
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
// TODO: try to put it close to addActiveLaneMask().
if (CM.foldTailWithEVL())
// TODO: try to put addExplicitVectorLength close to addActiveLaneMask
if (CM.foldTailWithEVL()) {
VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
*Plan, CM.getMaxSafeElements());
VPlanTransforms::runPass(VPlanTransforms::optimizeEVLMasks, *Plan);
}
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,11 @@ struct SpecificCmp_match {
: Predicate(Pred), Op0(LHS), Op1(RHS) {}

bool match(const VPValue *V) const {
auto *DefR = V->getDefiningRecipe();
return DefR && match(DefR);
}

bool match(const VPRecipeBase *V) const {
CmpPredicate CurrentPred;
return Cmp_match<Op0_t, Op1_t, Opcodes...>(CurrentPred, Op0, Op1)
.match(V) &&
Expand Down
107 changes: 58 additions & 49 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2979,8 +2979,47 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
return nullptr;
}

/// Replace recipes with their EVL variants.
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
/// Optimize away any EVL-based header masks to VP intrinsic based recipes.
/// The transforms here need to preserve the original semantics.
void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) {
// Find the EVL-based header mask if it exists: icmp ult step-vector, EVL
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the code use m_SpecificICmp to look for icmp ult?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in e5dbe66, but I had to add a specific VPRecipeBase overload for SpecificCmp_match so we can directly call match on VPRecipeBase. Regular Cmp_match does the same thing

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Grea,t hat useful in any cae

VPValue *HeaderMask = nullptr, *EVL = nullptr;
for (VPRecipeBase &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) {
if (match(&R, m_SpecificICmp(CmpInst::ICMP_ULT, m_StepVector(),
m_VPValue(EVL))) &&
match(EVL, m_EVL(m_VPValue()))) {
HeaderMask = R.getVPSingleValue();
break;
}
}
if (!HeaderMask)
return;

VPTypeAnalysis TypeInfo(Plan);
SmallVector<VPRecipeBase *> OldRecipes;
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
VPRecipeBase *R = cast<VPRecipeBase>(U);
if (auto *NewR = optimizeMaskToEVL(HeaderMask, *R, TypeInfo, *EVL)) {
NewR->insertBefore(R);
for (auto [Old, New] :
zip_equal(R->definedValues(), NewR->definedValues()))
Old->replaceAllUsesWith(New);
OldRecipes.push_back(R);
}
}
// Erase old recipes at the end so we don't invalidate TypeInfo.
for (VPRecipeBase *R : reverse(OldRecipes)) {
SmallVector<VPValue *> PossiblyDead(R->operands());
R->eraseFromParent();
for (VPValue *Op : PossiblyDead)
recursivelyDeleteDeadRecipes(Op);
}
}

/// After replacing the canonical IV with a EVL-based IV, fixup recipes that use
/// VF to use the EVL instead to avoid incorrect updates on the penultimate
/// iteration.
static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(Plan);
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
Expand Down Expand Up @@ -3008,10 +3047,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
return isa<VPWidenPointerInductionRecipe>(U);
});

// Defer erasing recipes till the end so that we don't invalidate the
// VPTypeAnalysis cache.
SmallVector<VPRecipeBase *> ToErase;

// Create a scalar phi to track the previous EVL if fixed-order recurrence is
// contained.
bool ContainsFORs =
Expand Down Expand Up @@ -3046,7 +3081,6 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
R.getDebugLoc());
VPSplice->insertBefore(&R);
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
ToErase.push_back(&R);
}
}
}
Expand All @@ -3067,49 +3101,23 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
CmpInst::ICMP_ULT,
Builder.createNaryOp(VPInstruction::StepVector, {}, EVLType), &EVL);
HeaderMask->replaceAllUsesWith(EVLMask);
ToErase.push_back(HeaderMask->getDefiningRecipe());

// Try to optimize header mask recipes away to their EVL variants.
// TODO: Split optimizeMaskToEVL out and move into
// VPlanTransforms::optimize. transformRecipestoEVLRecipes should be run in
// tryToBuildVPlanWithVPRecipes beforehand.
for (VPUser *U : collectUsersRecursively(EVLMask)) {
auto *CurRecipe = cast<VPRecipeBase>(U);
VPRecipeBase *EVLRecipe =
optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);
if (!EVLRecipe)
continue;

unsigned NumDefVal = EVLRecipe->getNumDefinedValues();
assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
"New recipe must define the same number of values as the "
"original.");
EVLRecipe->insertBefore(CurRecipe);
if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe, VPInterleaveEVLRecipe>(
EVLRecipe)) {
for (unsigned I = 0; I < NumDefVal; ++I) {
VPValue *CurVPV = CurRecipe->getVPValue(I);
CurVPV->replaceAllUsesWith(EVLRecipe->getVPValue(I));
}
}
ToErase.push_back(CurRecipe);
}
// Remove dead EVL mask.
if (EVLMask->getNumUsers() == 0)
ToErase.push_back(EVLMask->getDefiningRecipe());

for (VPRecipeBase *R : reverse(ToErase)) {
SmallVector<VPValue *> PossiblyDead(R->operands());
R->eraseFromParent();
for (VPValue *Op : PossiblyDead)
recursivelyDeleteDeadRecipes(Op);
}
}

/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
/// replaces all uses except the canonical IV increment of
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
/// is used only for loop iterations counting after this transformation.
/// Converts a tail folded vector loop region to step by
/// VPInstruction::ExplicitVectorLength elements instead of VF elements each
/// iteration.
///
/// - Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
/// replaces all uses except the canonical IV increment of
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe.
/// VPCanonicalIVPHIRecipe is used only for loop iterations counting after
/// this transformation.
///
/// - The header mask is replaced with a header mask based on the EVL.
///
/// - Plans with FORs have a new phi added to keep track of the EVL of the
/// previous iteration, and VPFirstOrderRecurrencePHIRecipes are replaced with
/// @llvm.vp.splice.
///
/// The function uses the following definitions:
/// %StartV is the canonical induction start value.
Expand Down Expand Up @@ -3201,7 +3209,8 @@ void VPlanTransforms::addExplicitVectorLength(
DebugLoc::getCompilerGenerated(), "avl.next");
AVLPhi->addOperand(NextAVL);

transformRecipestoEVLRecipes(Plan, *VPEVL);
fixupVFUsersForEVL(Plan, *VPEVL);
removeDeadRecipes(Plan);

// Replace all uses of VPCanonicalIVPHIRecipe by
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,15 @@ struct VPlanTransforms {
addExplicitVectorLength(VPlan &Plan,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

migh be a good opporunity to clarfiy the documentation here, perhaps worth mentioning that this replaces the header mask.

/// replaces all uses except the canonical IV increment of
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe.

it may also have to introduce a new phi to track the EVL of the previous iteration

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarified the documentation in e550c42

const std::optional<unsigned> &MaxEVLSafeElements);

/// Optimize recipes which use an EVL-based header mask to VP intrinsics, for
/// example:
///
/// %mask = icmp ult step-vector, EVL
/// %load = load %ptr, %mask
/// -->
/// %load = vp.load %ptr, EVL
static void optimizeEVLMasks(VPlan &Plan);

// For each Interleave Group in \p InterleaveGroups replace the Recipes
// widening its memory instructions with a single VPInterleaveRecipe at its
// insertion point.
Expand Down
Loading