[LLVM][TTI] Remove the isVScaleKnownToBeAPowerOfTwo hook.#183292
[LLVM][TTI] Remove the isVScaleKnownToBeAPowerOfTwo hook.#183292paulwalker-arm merged 2 commits intollvm:mainfrom
Conversation
Also removes target overrides and a test that's no longer applicable.
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-llvm-analysis Author: Paul Walker (paulwalker-arm) ChangesAfter #183080 this is no longer a configurable property. NOTE: No test changes expected beyond llvm/test/Transforms/LoopVectorize/scalable-predication.ll which has been removed because it validated the now unsupported functionality. Full diff: https://github.com/llvm/llvm-project/pull/183292.diff 13 Files Affected:
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index a7fb0efedadde..18ae6a005d972 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1358,9 +1358,6 @@ class TargetTransformInfo {
/// \return the value of vscale to tune the cost model for.
LLVM_ABI std::optional<unsigned> getVScaleForTuning() const;
- /// \return true if vscale is known to be a power of 2
- LLVM_ABI bool isVScaleKnownToBeAPowerOfTwo() const;
-
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 454be56aed6cc..e062b70be6b59 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -644,7 +644,6 @@ class TargetTransformInfoImplBase {
virtual std::optional<unsigned> getVScaleForTuning() const {
return std::nullopt;
}
- virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
virtual bool
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 68874c59be4b8..6dcb6f0062a08 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -889,7 +889,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
std::optional<unsigned> getVScaleForTuning() const override {
return std::nullopt;
}
- bool isVScaleKnownToBeAPowerOfTwo() const override { return false; }
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7964bfd81d704..4b60c3f905120 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -623,9 +623,6 @@ class LLVM_ABI TargetLoweringBase {
return BypassSlowDivWidths;
}
- /// Return true only if vscale must be a power of two.
- virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
-
/// Return true if Flow Control is an expensive operation that should be
/// avoided.
bool isJumpExpensive() const { return JumpIsExpensive; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 0e745a978656b..0f97edc424d7e 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -837,10 +837,6 @@ std::optional<unsigned> TargetTransformInfo::getVScaleForTuning() const {
return TTIImpl->getVScaleForTuning();
}
-bool TargetTransformInfo::isVScaleKnownToBeAPowerOfTwo() const {
- return TTIImpl->isVScaleKnownToBeAPowerOfTwo();
-}
-
bool TargetTransformInfo::shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const {
return TTIImpl->shouldMaximizeVectorBandwidth(K);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3affb4de2d4b4..a58c08bd00041 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4757,11 +4757,9 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
Depth + 1);
case ISD::VSCALE:
- // vscale(power-of-two) is a power-of-two for some targets
- if (getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() &&
- isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false, Depth + 1))
- return true;
- break;
+ // vscale(power-of-two) is a power-of-two
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+ Depth + 1);
}
// More could be done here, though the above checks are enough
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6ecea4f6e2d5e..b1df977d43fcf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -563,8 +563,6 @@ class AArch64TargetLowering : public TargetLowering {
SDValue Chain, SDValue InGlue, unsigned Condition,
bool InsertVectorLengthCheck = false) const;
- bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
-
/// Returns true if \p RdxOp should be lowered to a SVE reduction. If a SVE2
/// pairwise operation can be used for the reduction \p PairwiseOpIID is set
/// to its intrinsic ID.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index e166e0cfdaafd..f247e9e49e23f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -165,8 +165,6 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
return ST->getVScaleForTuning();
}
- bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
-
bool shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 77512b609fba8..227abc9e80579 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25714,18 +25714,6 @@ const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
}
-bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
- // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
- // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
- // a power of two as well.
- // FIXME: This doesn't work for zve32, but that's already broken
- // elsewhere for the same reason.
- assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
- static_assert(RISCV::RVVBitsPerBlock == 64,
- "RVVBitsPerBlock changed, audit needed");
- return true;
-}
-
bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index c4bb32802ec05..8d88aeb7ae3fc 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -392,8 +392,6 @@ class RISCVTargetLowering : public TargetLowering {
unsigned uid,
MCContext &Ctx) const override;
- bool isVScaleKnownToBeAPowerOfTwo() const override;
-
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM, SelectionDAG &DAG) const;
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9e9277f050e01..424f9fe52c59e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -358,10 +358,6 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override;
- bool isVScaleKnownToBeAPowerOfTwo() const override {
- return TLI->isVScaleKnownToBeAPowerOfTwo();
- }
-
/// \returns How the target needs this vector-predicated operation to be
/// transformed.
TargetTransformInfo::VPLegalization
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2342c8bfa502e..0fd425c23c7aa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2383,21 +2383,8 @@ Value *EpilogueVectorizerMainLoop::createIterationCountCheck(
// check is known to be true, or known to be false.
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
} // else step known to be < trip count, use CheckMinIters preset to false.
- } else if (VF.isScalable() && !TTI->isVScaleKnownToBeAPowerOfTwo() &&
- !isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
- Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
- // vscale is not necessarily a power-of-2, which means we cannot guarantee
- // an overflow to zero when updating induction variables and so an
- // additional overflow check is required before entering the vector loop.
-
- // Get the maximum unsigned value for the type.
- Value *MaxUIntTripCount =
- ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
- Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
-
- // Don't execute the vector loop if (UMax - n) < (VF * UF).
- CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep());
}
+
return CheckMinIters;
}
@@ -3663,7 +3650,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
MaxFactors.FixedVF.getFixedValue();
if (MaxFactors.ScalableVF) {
std::optional<unsigned> MaxVScale = getMaxVScale(*TheFunction, TTI);
- if (MaxVScale && TTI.isVScaleKnownToBeAPowerOfTwo()) {
+ if (MaxVScale) {
MaxPowerOf2RuntimeVF = std::max<unsigned>(
*MaxPowerOf2RuntimeVF,
*MaxVScale * MaxFactors.ScalableVF.getKnownMinValue());
@@ -8692,14 +8679,6 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
void LoopVectorizationPlanner::addMinimumIterationCheck(
VPlan &Plan, ElementCount VF, unsigned UF,
ElementCount MinProfitableTripCount) const {
- // vscale is not necessarily a power-of-2, which means we cannot guarantee
- // an overflow to zero when updating induction variables and so an
- // additional overflow check is required before entering the vector loop.
- bool IsIndvarOverflowCheckNeededForVF =
- VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() &&
- !isIndvarOverflowCheckKnownFalse(&CM, VF, UF) &&
- CM.getTailFoldingStyle() !=
- TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
const uint32_t *BranchWeigths =
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())
? &MinItersBypassWeights[0]
@@ -8707,7 +8686,7 @@ void LoopVectorizationPlanner::addMinimumIterationCheck(
VPlanTransforms::addMinimumIterationCheck(
Plan, VF, UF, MinProfitableTripCount,
CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(),
- IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths,
+ /*CheckNeededWithTailFolding=*/false, OrigLoop, BranchWeigths,
OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(), PSE);
}
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
deleted file mode 100644
index 65d3e7e7cbdf4..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll
+++ /dev/null
@@ -1,114 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes=loop-vectorize -force-tail-folding-style=data -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-target-supports-scalable-vectors -S < %s | FileCheck %s
-
-; vscale is not guaranteed to be a power of two, so this test (which
-; deliberately doesn't correspond to an in-tree backend since those
-; *do* have vscale as power-of-two) exercises the code required for the
-; minimum iteration check in the non-power-of-two case.
-
-define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) {
-; CHECK-LABEL: @foo(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
-; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 -257, [[TMP7]]
-; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 256, [[TMP2]]
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]]
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
-; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], 256
-; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: ret void
-;
-entry:
- br label %while.body
-
-while.body: ; preds = %while.body, %entry
- %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ]
- %gep = getelementptr i32, ptr %ptr, i64 %index
- %ld1 = load i32, ptr %gep, align 4
- %index.next = add nsw i64 %index, 1
- %cmp10 = icmp ult i64 %index.next, 256
- br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0
-
-while.end.loopexit: ; preds = %while.body
- ret void
-}
-
-; Same as @foo, but with variable trip count.
-define void @foo2(i32 %val, ptr dereferenceable(1024) %ptr, i64 %n) {
-; CHECK-LABEL: @foo2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]]
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
-; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
-; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP6]]
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
-; CHECK: while.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1
-; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]]
-; CHECK: while.end.loopexit:
-; CHECK-NEXT: ret void
-;
-entry:
- br label %while.body
-
-while.body: ; preds = %while.body, %entry
- %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ]
- %gep = getelementptr i32, ptr %ptr, i64 %index
- %ld1 = load i32, ptr %gep, align 4
- %index.next = add nsw i64 %index, 1
- %cmp10 = icmp ult i64 %index.next, %n
- br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0
-
-while.end.loopexit: ; preds = %while.body
- ret void
-}
-
-!0 = distinct !{!0, !1, !2, !3, !4}
-!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
-!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
-!3 = !{!"llvm.loop.interleave.count", i32 1}
-!4 = !{!"llvm.loop.vectorize.width", i32 4}
|
| Plan, VF, UF, MinProfitableTripCount, | ||
| CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(), | ||
| IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths, | ||
| /*CheckNeededWithTailFolding=*/false, OrigLoop, BranchWeigths, |
There was a problem hiding this comment.
I suspect the CheckNeededWithTailFolding parameter can be removed, but I'd rather do that as a separate PR if that's agreeable.
| Plan, VF, UF, MinProfitableTripCount, | ||
| CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(), | ||
| IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths, | ||
| /*CheckNeededWithTailFolding=*/false, OrigLoop, BranchWeigths, |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/41830 Here is the relevant piece of the build log for the reference |
…heck Previously, the canonical IV increment may have overflowed to a non-zero value due to vscale being a non power-of-two. So we used to emit a runtime check for this. If you didn't want the runtime check, DataAndControlFlowWithoutRuntimeCheck skipped it and instead tweaked the trip count so it wouldn't overflow. However llvm#144963 stopped the check from ever being emitted (and in llvm#183292 the code to emit the check was removed), but we never restored the trip count back to normal now that it was no longer needed. This PR restores the trip count since we don't need to adjust it. A follow up NFC can then remove DataAndControlFlowWithoutRuntimeCheck.
Stacked on llvm#183729 After llvm#144963 and llvm#183292 we never emit the runtime check, so DataAndControlFlowWithoutRuntimeCheck is equivalent to DataAndControlFlow. With that we only need to store one tail folding style instead of two, because we don't need to distinguish whether or not the IV update overflows (to a non-zero value)
…heck (#183729) Previously, the canonical IV increment may have overflowed to a non-zero value due to vscale being a non power-of-two. So we used to emit a runtime check for this. If you didn't want the runtime check, DataAndControlFlowWithoutRuntimeCheck skipped it and instead tweaked the trip count so it wouldn't overflow. However #144963 stopped the check from ever being emitted because vscale is always a power-of-two on AArch64 and RISC-V, so it never overflowed to a non-zero value. And in #183292 the code to emit the check was removed. But we never restored the trip count back to normal when the target's vscale was a power-of-two. Now that vscale is always a power-of-two, this PR avoids adjusting it. A follow up NFC can then remove DataAndControlFlowWithoutRuntimeCheck.
Stacked on llvm#183729 After llvm#144963 and llvm#183292 we never emit the runtime check, so DataAndControlFlowWithoutRuntimeCheck is equivalent to DataAndControlFlow. With that we only need to store one tail folding style instead of two, because we don't need to distinguish whether or not the IV update overflows (to a non-zero value)
After #144963 and #183292 we never emit the runtime check, so DataAndControlFlowWithoutRuntimeCheck is equivalent to DataAndControlFlow. With that we only need to store one tail folding style instead of two, because we don't need to distinguish whether or not the IV update overflows (to a non-zero value)
After llvm#144963 and llvm#183292 we never emit the runtime check, so DataAndControlFlowWithoutRuntimeCheck is equivalent to DataAndControlFlow. With that we only need to store one tail folding style instead of two, because we don't need to distinguish whether or not the IV update overflows (to a non-zero value)
…heck (llvm#183729) Previously, the canonical IV increment may have overflowed to a non-zero value due to vscale being a non power-of-two. So we used to emit a runtime check for this. If you didn't want the runtime check, DataAndControlFlowWithoutRuntimeCheck skipped it and instead tweaked the trip count so it wouldn't overflow. However llvm#144963 stopped the check from ever being emitted because vscale is always a power-of-two on AArch64 and RISC-V, so it never overflowed to a non-zero value. And in llvm#183292 the code to emit the check was removed. But we never restored the trip count back to normal when the target's vscale was a power-of-two. Now that vscale is always a power-of-two, this PR avoids adjusting it. A follow up NFC can then remove DataAndControlFlowWithoutRuntimeCheck.
After llvm#144963 and llvm#183292 we never emit the runtime check, so DataAndControlFlowWithoutRuntimeCheck is equivalent to DataAndControlFlow. With that we only need to store one tail folding style instead of two, because we don't need to distinguish whether or not the IV update overflows (to a non-zero value)
After llvm#183080 this is no longer a configurable property. NOTE: No test changes expected beyond llvm/test/Transforms/LoopVectorize/scalable-predication.ll which has been removed because it only existed to verfiy the now unsupported functionality.
…heck (llvm#183729) Previously, the canonical IV increment may have overflowed to a non-zero value due to vscale being a non power-of-two. So we used to emit a runtime check for this. If you didn't want the runtime check, DataAndControlFlowWithoutRuntimeCheck skipped it and instead tweaked the trip count so it wouldn't overflow. However llvm#144963 stopped the check from ever being emitted because vscale is always a power-of-two on AArch64 and RISC-V, so it never overflowed to a non-zero value. And in llvm#183292 the code to emit the check was removed. But we never restored the trip count back to normal when the target's vscale was a power-of-two. Now that vscale is always a power-of-two, this PR avoids adjusting it. A follow up NFC can then remove DataAndControlFlowWithoutRuntimeCheck.
After llvm#144963 and llvm#183292 we never emit the runtime check, so DataAndControlFlowWithoutRuntimeCheck is equivalent to DataAndControlFlow. With that we only need to store one tail folding style instead of two, because we don't need to distinguish whether or not the IV update overflows (to a non-zero value)
After #183080 this is no longer a configurable property.
NOTE: No test changes expected beyond llvm/test/Transforms/LoopVectorize/scalable-predication.ll which has been removed because it only existed to verfiy the now unsupported functionality.