Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions llvm/include/llvm/IR/VectorTypeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@

namespace llvm {

/// Returns true if \p IID is a vector intrinsic that returns a struct with a
/// scalar element at index \p EleIdx.
LLVM_ABI bool isVectorIntrinsicWithStructReturnScalarAtField(unsigned IID,
unsigned EleIdx);

/// A helper function for converting Scalar types to vector types. If
/// the incoming type is void, we return void. If the EC represents a
/// scalar, we return the scalar type.
Expand All @@ -31,7 +36,11 @@ inline Type *toVectorTy(Type *Scalar, unsigned VF) {
/// Note:
/// - If \p EC is scalar, \p StructTy is returned unchanged
/// - Only unpacked literal struct types are supported
LLVM_ABI Type *toVectorizedStructTy(StructType *StructTy, ElementCount EC);
/// vector types.
/// - If IID (Intrinsic ID) is provided, only fields that are vector types
/// are widened.
LLVM_ABI Type *toVectorizedStructTy(StructType *StructTy, ElementCount EC,
unsigned IID = 0);

/// A helper for converting structs of vector types to structs of scalar types.
/// Note: Only unpacked literal struct types are supported.
Expand All @@ -52,9 +61,9 @@ LLVM_ABI bool canVectorizeStructTy(StructType *StructTy);
/// - If the incoming type is void, we return void
/// - If \p EC is scalar, \p Ty is returned unchanged
/// - Only unpacked literal struct types are supported
inline Type *toVectorizedTy(Type *Ty, ElementCount EC) {
inline Type *toVectorizedTy(Type *Ty, ElementCount EC, unsigned IID = 0) {
if (StructType *StructTy = dyn_cast<StructType>(Ty))
return toVectorizedStructTy(StructTy, EC);
return toVectorizedStructTy(StructTy, EC, IID);
return toVectorTy(Ty, EC);
}

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
return (ScalarOpdIdx == 2);
case Intrinsic::experimental_vp_splice:
return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
case Intrinsic::vp_load_ff:
return ScalarOpdIdx == 0 || ScalarOpdIdx == 2;
default:
return false;
}
Expand Down Expand Up @@ -212,6 +214,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
case Intrinsic::powi:
case Intrinsic::ldexp:
return OpdIdx == -1 || OpdIdx == 1;
case Intrinsic::vp_load_ff:
return OpdIdx == 0;
default:
return OpdIdx == -1;
}
Expand All @@ -224,6 +228,10 @@ bool llvm::isVectorIntrinsicWithStructReturnOverloadAtField(
return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);

switch (ID) {
case Intrinsic::modf:
case Intrinsic::sincos:
case Intrinsic::sincospi:
return false;
case Intrinsic::frexp:
return RetIdx == 0 || RetIdx == 1;
default:
Expand Down
16 changes: 14 additions & 2 deletions llvm/lib/IR/VectorTypeUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,21 @@

#include "llvm/IR/VectorTypeUtils.h"
#include "llvm/ADT/SmallVectorExtras.h"
#include "llvm/IR/Intrinsics.h"

using namespace llvm;

bool llvm::isVectorIntrinsicWithStructReturnScalarAtField(unsigned IID,
unsigned EleIdx) {
if (IID == Intrinsic::vp_load_ff)
return EleIdx == 1;
return false;
}

/// A helper for converting structs of scalar types to structs of vector types.
/// Note: Only unpacked literal struct types are supported.
Type *llvm::toVectorizedStructTy(StructType *StructTy, ElementCount EC) {
Type *llvm::toVectorizedStructTy(StructType *StructTy, ElementCount EC,
unsigned IID) {
if (EC.isScalar())
return StructTy;
assert(isUnpackedStructLiteral(StructTy) &&
Expand All @@ -22,7 +31,10 @@ Type *llvm::toVectorizedStructTy(StructType *StructTy, ElementCount EC) {
"expected all element types to be valid vector element types");
return StructType::get(
StructTy->getContext(),
map_to_vector(StructTy->elements(), [&](Type *ElTy) -> Type * {
map_to_vector(enumerate(StructTy->elements()), [&](auto It) -> Type * {
Type *ElTy = It.value();
if (isVectorIntrinsicWithStructReturnScalarAtField(IID, It.index()))
return ElTy;
return VectorType::get(ElTy, EC);
}));
}
Expand Down
61 changes: 57 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));

static cl::opt<bool>
EnableEarlyExitWithFFLoads("enable-early-exit-with-ffload", cl::init(false),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the spirit of incremental development can we remove this option just have it on by default?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it work for other targets besides RISCV? If so, the PR should have some tests for other backends too, or even better in the top level Transforms/LoopVectorize directory.

cl::Hidden,
cl::desc("Enable vectorization of early-exit "
"loops with fault-only-first loads."));

static cl::opt<bool> ConsiderRegPressure(
"vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
cl::desc("Discard VFs if their register pressure is too high."));
Expand Down Expand Up @@ -3551,6 +3557,15 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}

if (!Legal->getPotentiallyFaultingLoads().empty() && UserIC > 1) {
reportVectorizationFailure("Auto-vectorization of loops with potentially "
"faulting loads is not supported when the "
"interleave count is more than 1",
"CantInterleaveLoopWithPotentiallyFaultingLoads",
ORE, TheLoop);
return FixedScalableVFPair::getNone();
}

ScalarEvolution *SE = PSE.getSE();
ElementCount TC = getSmallConstantTripCount(SE, TheLoop);
unsigned MaxTC = PSE.getSmallConstantMaxTripCount();
Expand Down Expand Up @@ -4163,7 +4178,11 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
Type *ScalarTy = TypeInfo.inferScalarType(ToCheck);
if (!Visited.insert({ScalarTy}).second)
continue;
Type *WideTy = toVectorizedTy(ScalarTy, VF);
unsigned IID = 0;
if (auto *WI = dyn_cast<VPWidenIntrinsicRecipe>(&R))
WI->getVectorIntrinsicID();
Type *WideTy = toVectorizedTy(ScalarTy, VF, IID);

if (any_of(getContainedTypes(WideTy), WillGenerateTargetVectors))
return true;
}
Expand Down Expand Up @@ -4626,6 +4645,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
if (!Legal->isSafeForAnyVectorWidth())
return 1;

// No interleaving for potentially faulting loads.
if (!Legal->getPotentiallyFaultingLoads().empty())
return 1;

// We don't attempt to perform interleaving for loops with uncountable early
// exits because the VPInstruction::AnyOf code cannot currently handle
// multiple parts.
Expand Down Expand Up @@ -7382,6 +7405,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);

VPlanTransforms::convertFFLoadEarlyExitToVLStepping(BestVPlan);

// Canonicalize EVL loops after regions are dissolved.
VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
Expand Down Expand Up @@ -7610,8 +7636,8 @@ void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
});
}

VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
VFRange &Range) {
VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
VFRange &Range) {
assert((VPI->getOpcode() == Instruction::Load ||
VPI->getOpcode() == Instruction::Store) &&
"Must be called with either a load or store");
Expand Down Expand Up @@ -7672,6 +7698,23 @@ VPWidenMemoryRecipe *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
Builder.insert(VectorPtr);
Ptr = VectorPtr;
}

if (Legal->getPotentiallyFaultingLoads().contains(I)) {
auto *I32Ty = IntegerType::getInt32Ty(Plan.getContext());
auto *RetTy = StructType::get(I->getType(), I32Ty);
DebugLoc DL = I->getDebugLoc();
if (!Mask)
Mask = Plan.getOrAddLiveIn(
ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
auto *FFLoad = new VPWidenIntrinsicRecipe(Intrinsic::vp_load_ff,
{Ptr, Mask, &Plan.getVF()}, RetTy,
*VPI, *VPI, DL);
Builder.insert(FFLoad);
VPValue *Zero = Plan.getConstantInt(32, 0);
return new VPInstruction(VPInstruction::ExtractVectorValue, {FFLoad, Zero},
{}, {}, DL);
}

if (VPI->getOpcode() == Instruction::Load) {
auto *Load = cast<LoadInst>(I);
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse, *VPI,
Expand Down Expand Up @@ -8617,6 +8660,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
if (!VPlanTransforms::runPass(VPlanTransforms::handleMultiUseReductions,
*Plan))
return nullptr;

VPlanTransforms::adjustFFLoadEarlyExitForPoisonSafety(*Plan);

// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
Expand Down Expand Up @@ -9948,7 +9994,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}

if (!LVL.getPotentiallyFaultingLoads().empty()) {
if (EnableEarlyExitWithFFLoads) {
if (LVL.getPotentiallyFaultingLoads().size() > 1) {
reportVectorizationFailure("Auto-vectorization of loops with more than 1 "
"potentially faulting load is not enabled",
"MoreThanOnePotentiallyFaultingLoad", ORE, L);
return false;
}
} else if (!LVL.getPotentiallyFaultingLoads().empty()) {
reportVectorizationFailure("Auto-vectorization of loops with potentially "
"faulting load is not supported",
"PotentiallyFaultingLoadsNotSupported", ORE, L);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p VPI should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryRecipe *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);
VPRecipeBase *tryToWidenMemory(VPInstruction *VPI, VFRange &Range);

/// Check if an induction recipe should be constructed for \p VPI. If so build
/// and return it. If not, return null.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,10 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// The lane specifies an index into a vector formed by combining all vector
/// operands (all operands after the first one).
ExtractLane,
// Extracts a scalar value from an aggregate value.
ExtractScalarValue,
// Extracts a vector value from an aggregate value.
ExtractVectorValue,
/// Explicit user for the resume phi of the canonical induction in the main
/// VPlan, used by the epilogue vector loop.
ResumeForEpilogue,
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnCount:
return Type::getVoidTy(Ctx);
case VPInstruction::ExtractScalarValue:
case VPInstruction::ExtractVectorValue: {
assert(R->getNumOperands() == 2 && "expected single level extractvalue");
auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0)));
auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue());
return StructTy->getTypeAtIndex(CI->getZExtValue());
}
default:
break;
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ struct VPTransformState {
set(Def, V, VPLane(0));
return;
}
assert((VF.isScalar() || isVectorizedTy(V->getType())) &&
assert((VF.isScalar() || isVectorizedTy(V->getType()) ||
V->getType()->isStructTy()) &&
"scalar values must be stored as (0, 0)");
Data.VPV2Vector[Def] = V;
}
Expand Down
30 changes: 28 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::BranchOnCount:
case VPInstruction::ComputeReductionResult:
case VPInstruction::ExtractLane:
case VPInstruction::ExtractScalarValue:
case VPInstruction::ExtractVectorValue:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::PtrAdd:
Expand Down Expand Up @@ -832,6 +834,13 @@ Value *VPInstruction::generate(VPTransformState &State) {
Res->setName(Name);
return Res;
}
case VPInstruction::ExtractVectorValue:
case VPInstruction::ExtractScalarValue: {
assert(getNumOperands() == 2 && "expected single level extractvalue");
Value *Op = State.get(getOperand(0));
auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
return Builder.CreateExtractValue(Op, CI->getZExtValue());
}
case VPInstruction::LogicalAnd: {
Value *A = State.get(getOperand(0));
Value *B = State.get(getOperand(1));
Expand Down Expand Up @@ -1138,6 +1147,7 @@ bool VPInstruction::isVectorToScalar() const {
bool VPInstruction::isSingleScalar() const {
switch (getOpcode()) {
case Instruction::PHI:
case VPInstruction::ExtractScalarValue:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::ResumeForEpilogue:
case VPInstruction::VScale:
Expand Down Expand Up @@ -1349,6 +1359,12 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
break;
case VPInstruction::ExtractScalarValue:
O << "extract-scalar-value";
break;
case VPInstruction::ExtractVectorValue:
O << "extract-vector-value";
break;
case VPInstruction::ComputeAnyOfResult:
O << "compute-anyof-result";
break;
Expand Down Expand Up @@ -1695,7 +1711,16 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {

SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
if (ResultTy->isStructTy()) {
auto *StructTy = cast<StructType>(ResultTy);
for (unsigned I = 0, E = StructTy->getNumElements(); I != E; ++I) {
if (isVectorIntrinsicWithStructReturnOverloadAtField(VectorIntrinsicID, I,
State.TTI))
TysForDecl.push_back(
toVectorizedTy(StructTy->getStructElementType(I), State.VF));
}
} else if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1,
State.TTI))
TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
SmallVector<Value *, 4> Args;
for (const auto &I : enumerate(operands())) {
Expand Down Expand Up @@ -1760,7 +1785,8 @@ static InstructionCost getCostForIntrinsics(Intrinsic::ID ID,
}

Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
Type *RetTy = VF.isVector() ? toVectorizedTy(ScalarRetTy, VF) : ScalarRetTy;
Type *RetTy =
VF.isVector() ? toVectorizedTy(ScalarRetTy, VF, ID) : ScalarRetTy;
SmallVector<Type *> ParamTys;
for (const VPValue *Op : Operands) {
ParamTys.push_back(VF.isVector()
Expand Down
Loading