Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesStacked on #183065. This PR removes the runtime IV overflow checks in the iteration check to show how it's dead across targets. Only the target independent tests are affected but #183065 makes force-target-supports-scalable-vectors imply a power-of-2 vscale Full diff: https://github.com/llvm/llvm-project/pull/183066.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 77be8cc95b6da..1726963f43e32 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25717,9 +25717,6 @@ bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
// We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
// of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
// a power of two as well.
- // FIXME: This doesn't work for zve32, but that's already broken
- // elsewhere for the same reason.
- assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
static_assert(RISCV::RVVBitsPerBlock == 64,
"RVVBitsPerBlock changed, audit needed");
return true;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b28c3d949c96a..81867bd112d15 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -296,9 +296,9 @@ cl::opt<unsigned> llvm::ForceTargetInstructionCost(
static cl::opt<bool> ForceTargetSupportsScalableVectors(
"force-target-supports-scalable-vectors", cl::init(false), cl::Hidden,
- cl::desc(
- "Pretend that scalable vectors are supported, even if the target does "
- "not support them. This flag should only be used for testing."));
+ cl::desc("Pretend that scalable vectors are supported and vscale is a "
+ "power of two, even if the target does "
+ "not support them. This flag should only be used for testing."));
static cl::opt<unsigned> SmallLoopCost(
"small-loop-cost", cl::init(20), cl::Hidden,
@@ -2383,20 +2383,6 @@ Value *EpilogueVectorizerMainLoop::createIterationCountCheck(
// check is known to be true, or known to be false.
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
} // else step known to be < trip count, use CheckMinIters preset to false.
- } else if (VF.isScalable() && !TTI->isVScaleKnownToBeAPowerOfTwo() &&
- !isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
- Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
- // vscale is not necessarily a power-of-2, which means we cannot guarantee
- // an overflow to zero when updating induction variables and so an
- // additional overflow check is required before entering the vector loop.
-
- // Get the maximum unsigned value for the type.
- Value *MaxUIntTripCount =
- ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
- Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
-
- // Don't execute the vector loop if (UMax - n) < (VF * UF).
- CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep());
}
return CheckMinIters;
}
@@ -3387,6 +3373,10 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors)
return false;
+ if (!TTI.isVScaleKnownToBeAPowerOfTwo() &&
+ !ForceTargetSupportsScalableVectors)
+ return false;
+
if (Hints->isScalableVectorizationDisabled()) {
reportVectorizationInfo("Scalable vectorization is explicitly disabled",
"ScalableVectorizationDisabled", ORE, TheLoop);
@@ -8693,22 +8683,14 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
void LoopVectorizationPlanner::addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount) const {
- // vscale is not necessarily a power-of-2, which means we cannot guarantee
- // an overflow to zero when updating induction variables and so an
- // additional overflow check is required before entering the vector loop.
- bool IsIndvarOverflowCheckNeededForVF =
- VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() &&
- !isIndvarOverflowCheckKnownFalse(&CM, VF, UF) &&
- CM.getTailFoldingStyle() !=
- TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
- const uint32_t *BranchWeigths =
+ const uint32_t *BranchWeights =
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
? &MinItersBypassWeights[0]
: nullptr;
VPlanTransforms::addMinimumIterationCheck(
Plan, VF, UF, MinProfitableTripCount,
CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(),
- IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths,
+ OrigLoop, BranchWeights,
OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(), PSE);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 1af7392b904da..bfcc8a009b321 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -1042,9 +1042,8 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
void VPlanTransforms::addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
- bool TailFolded, bool CheckNeededWithTailFolding, Loop *OrigLoop,
- const uint32_t *MinItersBypassWeights, DebugLoc DL,
- PredicatedScalarEvolution &PSE) {
+ bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
+ DebugLoc DL, PredicatedScalarEvolution &PSE) {
// Generate code to check if the loop's trip count is less than VF * UF, or
// equal to it in case a scalar epilogue is required; this implies that the
// vector trip count is zero. This check also covers the case where adding one
@@ -1075,30 +1074,9 @@ void VPlanTransforms::addMinimumIterationCheck(
VPBuilder Builder(EntryVPBB);
VPValue *TripCountCheck = Plan.getFalse();
const SCEV *Step = GetMinTripCount();
- if (TailFolded) {
- if (CheckNeededWithTailFolding) {
- // vscale is not necessarily a power-of-2, which means we cannot guarantee
- // an overflow to zero when updating induction variables and so an
- // additional overflow check is required before entering the vector loop.
-
- VPValue *StepVPV = Builder.createExpandSCEV(Step);
-
- // Get the maximum unsigned value for the type.
- VPValue *MaxUIntTripCount =
- Plan.getConstantInt(cast<IntegerType>(TripCountTy)->getMask());
- VPValue *DistanceToMax =
- Builder.createSub(MaxUIntTripCount, TripCountVPV);
-
- // Don't execute the vector loop if (UMax - n) < (VF * UF).
- // FIXME: Should only check VF * UF, but currently checks Step=max(VF*UF,
- // minProfitableTripCount).
- TripCountCheck =
- Builder.createICmp(ICmpInst::ICMP_ULT, DistanceToMax, StepVPV, DL);
- } else {
- // TripCountCheck = false, folding tail implies positive vector trip
- // count.
- }
- } else {
+ // TripCountCheck = false, folding tail implies positive vector trip
+ // count.
+ if (!TailFolded) {
// TODO: Emit unconditional branch to vector preheader instead of
// conditional branch with known condition.
TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 7c5d44daf003f..f0af6360bf9e4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -158,13 +158,11 @@ struct VPlanTransforms {
bool TailFolded);
// Create a check to \p Plan to see if the vector loop should be executed.
- static void
- addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
- ElementCount MinProfitableTripCount,
- bool RequiresScalarEpilogue, bool TailFolded,
- bool CheckNeededWithTailFolding, Loop *OrigLoop,
- const uint32_t *MinItersBypassWeights, DebugLoc DL,
- PredicatedScalarEvolution &PSE);
+ static void addMinimumIterationCheck(
+ VPlan &Plan, ElementCount VF, unsigned UF,
+ ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
+ bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
+ DebugLoc DL, PredicatedScalarEvolution &PSE);
/// Add a check to \p Plan to see if the epilogue vector loop should be
/// executed.
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
index 65d3e7e7cbdf4..00630d7bbf301 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
@@ -9,10 +9,7 @@
define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 -257, [[TMP7]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
@@ -27,16 +24,7 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
-; CHECK: scalar.ph:
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
-; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], 256
-; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: while.end.loopexit:
; CHECK-NEXT: ret void
;
@@ -60,11 +48,7 @@ define void @foo2(i32 %val, ptr dereferenceable(1024) %ptr, i64 %n) {
; CHECK-LABEL: @foo2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
-; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
@@ -77,18 +61,9 @@ define void @foo2(i32 %val, ptr dereferenceable(1024) %ptr, i64 %n) {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
-; CHECK: scalar.ph:
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
-; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: while.end.loopexit:
; CHECK-NEXT: ret void
;
|
artagnon
left a comment
There was a problem hiding this comment.
Very nice cleanup that LGTM, thanks!
| "not support them. This flag should only be used for testing.")); | ||
| cl::desc("Pretend that scalable vectors are supported and vscale is a " | ||
| "power of two, even if the target does " | ||
| "not support them. This flag should only be used for testing.")); |
There was a problem hiding this comment.
| "not support them. This flag should only be used for testing.")); | |
| cl::desc("Pretend that scalable vectors are supported, even if the target does " |
This needs updating now I think
There was a problem hiding this comment.
Yup, waiting for #183292 to land first and then will rebase.
There was a problem hiding this comment.
Rebased so this diff is gone
… NFC The IV can no longer overflow with tail folding after llvm#183080.
ec422fc to
bd21bc9
Compare
… NFC (llvm#183066) The IV can no longer overflow with tail folding after llvm#183080.
The IV can no longer overflow with tail folding after #183080.