diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index a7fb0efedadde..18ae6a005d972 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1358,9 +1358,6 @@ class TargetTransformInfo { /// \return the value of vscale to tune the cost model for. LLVM_ABI std::optional getVScaleForTuning() const; - /// \return true if vscale is known to be a power of 2 - LLVM_ABI bool isVScaleKnownToBeAPowerOfTwo() const; - /// \return True if the vectorization factor should be chosen to /// make the vector of the smallest element type match the size of a /// vector register. For wider element types, this could result in diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 454be56aed6cc..e062b70be6b59 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -644,7 +644,6 @@ class TargetTransformInfoImplBase { virtual std::optional getVScaleForTuning() const { return std::nullopt; } - virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 68874c59be4b8..6dcb6f0062a08 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -889,7 +889,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { std::optional getVScaleForTuning() const override { return std::nullopt; } - bool isVScaleKnownToBeAPowerOfTwo() const override { return false; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 7964bfd81d704..4b60c3f905120 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -623,9 +623,6 @@ class LLVM_ABI TargetLoweringBase { return BypassSlowDivWidths; } - /// Return true only if vscale must be a power of two. - virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } - /// Return true if Flow Control is an expensive operation that should be /// avoided. bool isJumpExpensive() const { return JumpIsExpensive; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 0e745a978656b..0f97edc424d7e 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -837,10 +837,6 @@ std::optional TargetTransformInfo::getVScaleForTuning() const { return TTIImpl->getVScaleForTuning(); } -bool TargetTransformInfo::isVScaleKnownToBeAPowerOfTwo() const { - return TTIImpl->isVScaleKnownToBeAPowerOfTwo(); -} - bool TargetTransformInfo::shouldMaximizeVectorBandwidth( TargetTransformInfo::RegisterKind K) const { return TTIImpl->shouldMaximizeVectorBandwidth(K); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3affb4de2d4b4..a58c08bd00041 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4757,11 +4757,9 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, Depth + 1); case ISD::VSCALE: - // vscale(power-of-two) is a power-of-two for some targets - if (getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() && - isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false, Depth + 1)) - return true; - break; + // vscale(power-of-two) is a power-of-two + return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false, + Depth + 1); } // More could be done here, though the above checks are enough diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 6ecea4f6e2d5e..b1df977d43fcf 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -563,8 +563,6 @@ class AArch64TargetLowering : public TargetLowering { SDValue Chain, SDValue InGlue, unsigned Condition, bool InsertVectorLengthCheck = false) const; - bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } - /// Returns true if \p RdxOp should be lowered to a SVE reduction. If a SVE2 /// pairwise operation can be used for the reduction \p PairwiseOpIID is set /// to its intrinsic ID. diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index e166e0cfdaafd..f247e9e49e23f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -165,8 +165,6 @@ class AArch64TTIImpl final : public BasicTTIImplBase { return ST->getVScaleForTuning(); } - bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } - bool shouldMaximizeVectorBandwidth( TargetTransformInfo::RegisterKind K) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 77512b609fba8..227abc9e80579 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -25714,18 +25714,6 @@ const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry( return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); } -bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const { - // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power - // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be - // a power of two as well. - // FIXME: This doesn't work for zve32, but that's already broken - // elsewhere for the same reason. - assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported"); - static_assert(RISCV::RVVBitsPerBlock == 64, - "RVVBitsPerBlock changed, audit needed"); - return true; -} - bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index c4bb32802ec05..8d88aeb7ae3fc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -392,8 +392,6 @@ class RISCVTargetLowering : public TargetLowering { unsigned uid, MCContext &Ctx) const override; - bool isVScaleKnownToBeAPowerOfTwo() const override; - bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 9e9277f050e01..424f9fe52c59e 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -358,10 +358,6 @@ class RISCVTTIImpl final : public BasicTTIImplBase { bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override; - bool isVScaleKnownToBeAPowerOfTwo() const override { - return TLI->isVScaleKnownToBeAPowerOfTwo(); - } - /// \returns How the target needs this vector-predicated operation to be /// transformed. TargetTransformInfo::VPLegalization diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2342c8bfa502e..0fd425c23c7aa 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2383,21 +2383,8 @@ Value *EpilogueVectorizerMainLoop::createIterationCountCheck( // check is known to be true, or known to be false. CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check"); } // else step known to be < trip count, use CheckMinIters preset to false. - } else if (VF.isScalable() && !TTI->isVScaleKnownToBeAPowerOfTwo() && - !isIndvarOverflowCheckKnownFalse(Cost, VF, UF) && - Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) { - // vscale is not necessarily a power-of-2, which means we cannot guarantee - // an overflow to zero when updating induction variables and so an - // additional overflow check is required before entering the vector loop. - - // Get the maximum unsigned value for the type. - Value *MaxUIntTripCount = - ConstantInt::get(CountTy, cast(CountTy)->getMask()); - Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count); - - // Don't execute the vector loop if (UMax - n) < (VF * UF). - CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep()); } + return CheckMinIters; } @@ -3663,7 +3650,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { MaxFactors.FixedVF.getFixedValue(); if (MaxFactors.ScalableVF) { std::optional MaxVScale = getMaxVScale(*TheFunction, TTI); - if (MaxVScale && TTI.isVScaleKnownToBeAPowerOfTwo()) { + if (MaxVScale) { MaxPowerOf2RuntimeVF = std::max( *MaxPowerOf2RuntimeVF, *MaxVScale * MaxFactors.ScalableVF.getKnownMinValue()); @@ -8692,14 +8679,6 @@ void LoopVectorizationPlanner::attachRuntimeChecks( void LoopVectorizationPlanner::addMinimumIterationCheck( VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const { - // vscale is not necessarily a power-of-2, which means we cannot guarantee - // an overflow to zero when updating induction variables and so an - // additional overflow check is required before entering the vector loop. - bool IsIndvarOverflowCheckNeededForVF = - VF.isScalable() && !TTI.isVScaleKnownToBeAPowerOfTwo() && - !isIndvarOverflowCheckKnownFalse(&CM, VF, UF) && - CM.getTailFoldingStyle() != - TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck; const uint32_t *BranchWeigths = hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()) ? &MinItersBypassWeights[0] @@ -8707,7 +8686,7 @@ void LoopVectorizationPlanner::addMinimumIterationCheck( VPlanTransforms::addMinimumIterationCheck( Plan, VF, UF, MinProfitableTripCount, CM.requiresScalarEpilogue(VF.isVector()), CM.foldTailByMasking(), - IsIndvarOverflowCheckNeededForVF, OrigLoop, BranchWeigths, + /*CheckNeededWithTailFolding=*/false, OrigLoop, BranchWeigths, OrigLoop->getLoopPredecessor()->getTerminator()->getDebugLoc(), PSE); } diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll deleted file mode 100644 index 65d3e7e7cbdf4..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll +++ /dev/null @@ -1,114 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=loop-vectorize -force-tail-folding-style=data -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-target-supports-scalable-vectors -S < %s | FileCheck %s - -; vscale is not guaranteed to be a power of two, so this test (which -; deliberately doesn't correspond to an in-tree backend since those -; *do* have vscale as power-of-two) exercises the code required for the -; minimum iteration check in the non-power-of-two case. - -define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 -257, [[TMP7]] -; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 256, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void -; -entry: - br label %while.body - -while.body: ; preds = %while.body, %entry - %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ] - %gep = getelementptr i32, ptr %ptr, i64 %index - %ld1 = load i32, ptr %gep, align 4 - %index.next = add nsw i64 %index, 1 - %cmp10 = icmp ult i64 %index.next, 256 - br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0 - -while.end.loopexit: ; preds = %while.body - ret void -} - -; Same as @foo, but with variable trip count. -define void @foo2(i32 %val, ptr dereferenceable(1024) %ptr, i64 %n) { -; CHECK-LABEL: @foo2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 -1, [[UMAX]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] -; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX]], [[TMP6]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void -; -entry: - br label %while.body - -while.body: ; preds = %while.body, %entry - %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ] - %gep = getelementptr i32, ptr %ptr, i64 %index - %ld1 = load i32, ptr %gep, align 4 - %index.next = add nsw i64 %index, 1 - %cmp10 = icmp ult i64 %index.next, %n - br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0 - -while.end.loopexit: ; preds = %while.body - ret void -} - -!0 = distinct !{!0, !1, !2, !3, !4} -!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} -!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} -!3 = !{!"llvm.loop.interleave.count", i32 1} -!4 = !{!"llvm.loop.vectorize.width", i32 4}