Skip to content

Commit

Permalink
[VPlan] Split VPWidenMemoryInstructionRecipe (NFCI).
Browse files Browse the repository at this point in the history
  • Loading branch information
fhahn committed Apr 5, 2024
1 parent f0724f0 commit 2ec0e32
Show file tree
Hide file tree
Showing 22 changed files with 283 additions and 221 deletions.
167 changes: 88 additions & 79 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8095,7 +8095,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
BlockMaskCache[BB] = BlockMask;
}

VPWidenMemoryInstructionRecipe *
VPWidenMemoryRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
Expand Down Expand Up @@ -8140,12 +8140,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Ptr = VectorPtr;
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
Reverse, I->getDebugLoc());
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
I->getDebugLoc());

StoreInst *Store = cast<StoreInst>(I);
return new VPWidenMemoryInstructionRecipe(
*Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc());
return new VPWidenStoreRecipe(*Store, Operands[0], Ptr, Mask, Consecutive,
Reverse, I->getDebugLoc());
}

/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
Expand Down Expand Up @@ -8780,13 +8780,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
for (const auto *IG : InterleaveGroups) {
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
RecipeBuilder.getRecipe(IG->getInsertPos()));
auto *Recipe =
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
SmallVector<VPValue *, 4> StoredValues;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
auto *StoreR =
cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
StoredValues.push_back(StoreR->getStoredValue());
}

Expand Down Expand Up @@ -9464,27 +9463,19 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
return Call;
}

void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;

void VPWidenLoadRecipe::execute(VPTransformState &State) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);

assert((LI || SI) && "Invalid Load/Store instruction");
assert((!SI || StoredValue) && "No stored value provided for widened store");
assert((!LI || !StoredValue) && "Stored value provided for widened load");
auto *LI = cast<LoadInst>(&Ingredient);

Type *ScalarDataTy = getLoadStoreType(&Ingredient);

auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
bool CreateGatherScatter = !isConsecutive();
bool CreateGather = !isConsecutive();

auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
bool isMaskRequired = getMask();
if (isMaskRequired) {
bool IsMaskRequired = getMask();
if (IsMaskRequired) {
// Mask reversal is only needed for non-all-one (null) masks, as reverse of
// a null all-one mask is a null mask.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Expand All @@ -9495,56 +9486,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
}
}

// Handle Stores:
if (SI) {
State.setDebugLocFrom(getDebugLoc());

for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue, Part);
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
VPInstruction::ExplicitVectorLength &&
"EVL must be VPInstruction::ExplicitVectorLength.");
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
// If EVL is not nullptr, then EVL must be a valid value set during plan
// creation, possibly default value = whole vector register length. EVL
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse store after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateGatherScatter),
StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
MaskPart);
} else {
if (isReverse()) {
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
else
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
State.addMetadata(NewSI, SI);
}
return;
}

// Handle loads.
assert(LI && "Must have a load instruction");
State.setDebugLocFrom(getDebugLoc());
Expand All @@ -9564,19 +9505,19 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse loading after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewLI = lowerLoadUsingVectorIntrinsics(
Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
CreateGatherScatter, MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Builder, DataTy, State.get(getAddr(), Part, !CreateGather),
CreateGather, MaskPart, EVL, Alignment);
} else if (CreateGather) {
Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
if (IsMaskRequired)
NewLI = Builder.CreateMaskedLoad(
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
PoisonValue::get(DataTy), "wide.masked.load");
Expand All @@ -9590,7 +9531,75 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
}

State.set(getVPSingleValue(), NewLI, Part);
State.set(this, NewLI, Part);
}
}

void VPWidenStoreRecipe::execute(VPTransformState &State) {
auto *SI = cast<StoreInst>(&Ingredient);

VPValue *StoredValue = getStoredValue();
bool CreateScatter = !isConsecutive();
const Align Alignment = getLoadStoreAlignment(&Ingredient);

auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
bool IsMaskRequired = getMask();
if (IsMaskRequired) {
// Mask reversal is only needed for non-all-one (null) masks, as reverse of
// a null all-one mask is a null mask.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Mask = State.get(getMask(), Part);
if (isReverse())
Mask = Builder.CreateVectorReverse(Mask, "reverse");
BlockInMaskParts[Part] = Mask;
}
}

State.setDebugLocFrom(getDebugLoc());

for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue, Part);
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
VPInstruction::ExplicitVectorLength &&
"EVL must be VPInstruction::ExplicitVectorLength.");
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
// If EVL is not nullptr, then EVL must be a valid value set during plan
// creation, possibly default value = whole vector register length. EVL
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse store after vp_reverse is added.
Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal,
CreateScatter, MaskPart, EVL, Alignment);
} else if (CreateScatter) {
Value *MaskPart = IsMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
MaskPart);
} else {
if (isReverse()) {
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (IsMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
else
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
State.addMetadata(NewSI, SI);
}
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range);
VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range);

/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
Expand Down
Loading

0 comments on commit 2ec0e32

Please sign in to comment.