Skip to content

Commit

Permalink
[VPlan] Split VPWidenMemoryInstructionRecipe (NFCI). (#87411)
Browse files Browse the repository at this point in the history
This patch introduces a new VPWidenMemoryRecipe base class and distinct
sub-classes to model loads and stores.

This is a first step in an effort to simplify and modularize code
generation for widened loads and stores and enable adding further more
specialized memory recipes.

PR: #87411
  • Loading branch information
fhahn authored Apr 17, 2024
1 parent cbe148b commit a9bafe9
Show file tree
Hide file tree
Showing 13 changed files with 249 additions and 223 deletions.
191 changes: 90 additions & 101 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,11 +545,6 @@ class InnerLoopVectorizer {
// Return true if any runtime check is added.
bool areSafetyChecksAdded() { return AddedSafetyChecks; }

/// A type for vectorized values in the new loop. Each value from the
/// original loop, when vectorized, is represented by UF vector values in the
/// new unrolled loop, where UF is the unroll factor.
using VectorParts = SmallVector<Value *, 2>;

/// A helper function to scalarize a single Instruction in the innermost loop.
/// Generates a sequence of scalar instances for each lane between \p MinLane
/// and \p MaxLane, times each part between \p MinPart and \p MaxPart,
Expand Down Expand Up @@ -8086,7 +8081,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
BlockMaskCache[BB] = BlockMask;
}

VPWidenMemoryInstructionRecipe *
VPWidenMemoryRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
Expand Down Expand Up @@ -8131,12 +8126,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Ptr = VectorPtr;
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
Reverse, I->getDebugLoc());
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
I->getDebugLoc());

StoreInst *Store = cast<StoreInst>(I);
return new VPWidenMemoryInstructionRecipe(
*Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc());
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
Reverse, I->getDebugLoc());
}

/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
Expand Down Expand Up @@ -8775,13 +8770,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
for (const auto *IG : InterleaveGroups) {
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
RecipeBuilder.getRecipe(IG->getInsertPos()));
auto *Recipe =
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
SmallVector<VPValue *, 4> StoredValues;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
auto *StoreR =
cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
StoredValues.push_back(StoreR->getStoredValue());
}

Expand Down Expand Up @@ -9368,92 +9362,27 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
return Call;
}

void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;

// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);

assert((LI || SI) && "Invalid Load/Store instruction");
assert((!SI || StoredValue) && "No stored value provided for widened store");
assert((!LI || !StoredValue) && "Stored value provided for widened load");
void VPWidenLoadRecipe::execute(VPTransformState &State) {
auto *LI = cast<LoadInst>(&Ingredient);

Type *ScalarDataTy = getLoadStoreType(&Ingredient);

auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
bool CreateGatherScatter = !isConsecutive();
bool CreateGather = !isConsecutive();

auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
bool isMaskRequired = getMask();
if (isMaskRequired) {
// Mask reversal is only needed for non-all-one (null) masks, as reverse of
// a null all-one mask is a null mask.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Mask = State.get(getMask(), Part);
State.setDebugLocFrom(getDebugLoc());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewLI;
Value *Mask = nullptr;
if (auto *VPMask = getMask()) {
// Mask reversal is only needed for non-all-one (null) masks, as reverse
// of a null all-one mask is a null mask.
Mask = State.get(VPMask, Part);
if (isReverse())
Mask = Builder.CreateVectorReverse(Mask, "reverse");
BlockInMaskParts[Part] = Mask;
}
}

// Handle Stores:
if (SI) {
State.setDebugLocFrom(getDebugLoc());

for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue, Part);
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
VPInstruction::ExplicitVectorLength &&
"EVL must be VPInstruction::ExplicitVectorLength.");
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
// If EVL is not nullptr, then EVL must be a valid value set during plan
// creation, possibly default value = whole vector register length. EVL
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse store after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateGatherScatter),
StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
MaskPart);
} else {
if (isReverse()) {
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
else
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
State.addMetadata(NewSI, SI);
}
return;
}

// Handle loads.
assert(LI && "Must have a load instruction");
State.setDebugLocFrom(getDebugLoc());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewLI;
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
Expand All @@ -9468,22 +9397,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse loading after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewLI = lowerLoadUsingVectorIntrinsics(
Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
CreateGatherScatter, MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Builder, DataTy, State.get(getAddr(), Part, !CreateGather),
CreateGather, Mask, EVL, Alignment);
} else if (CreateGather) {
Value *VectorGep = State.get(getAddr(), Part);
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, Mask,
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
PoisonValue::get(DataTy), "wide.masked.load");
if (Mask)
NewLI = Builder.CreateMaskedLoad(DataTy, VecPtr, Alignment, Mask,
PoisonValue::get(DataTy),
"wide.masked.load");
else
NewLI =
Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
Expand All @@ -9494,7 +9421,69 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
}

State.set(getVPSingleValue(), NewLI, Part);
State.set(this, NewLI, Part);
}
}

void VPWidenStoreRecipe::execute(VPTransformState &State) {
auto *SI = cast<StoreInst>(&Ingredient);

VPValue *StoredVPValue = getStoredValue();
bool CreateScatter = !isConsecutive();
const Align Alignment = getLoadStoreAlignment(&Ingredient);

auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());

for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
Value *Mask = nullptr;
if (auto *VPMask = getMask()) {
// Mask reversal is only needed for non-all-one (null) masks, as reverse
// of a null all-one mask is a null mask.
Mask = State.get(VPMask, Part);
if (isReverse())
Mask = Builder.CreateVectorReverse(Mask, "reverse");
}

Value *StoredVal = State.get(StoredVPValue, Part);
if (isReverse()) {
assert(!State.EVL && "reversing not yet implemented with EVL");
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
VPInstruction::ExplicitVectorLength &&
"EVL must be VPInstruction::ExplicitVectorLength.");
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
// If EVL is not nullptr, then EVL must be a valid value set during plan
// creation, possibly default value = whole vector register length. EVL
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse store after vp_reverse is added.
NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal,
CreateScatter, Mask, EVL, Alignment);
} else if (CreateScatter) {
Value *VectorGep = State.get(getAddr(), Part);
NewSI =
Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, Mask);
} else {
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (Mask)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, Mask);
else
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
State.addMetadata(NewSI, SI);
}
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range);
VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range);

/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
Expand Down
Loading

0 comments on commit a9bafe9

Please sign in to comment.