diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 84564563de8e3..4cec8115709d6 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -85,11 +85,13 @@ LLVM_ABI bool isDereferenceableAndAlignedInLoop( AssumptionCache *AC = nullptr, SmallVectorImpl *Predicates = nullptr); -/// Return true if the loop \p L cannot fault on any iteration and only -/// contains read-only memory accesses. -LLVM_ABI bool isDereferenceableReadOnlyLoop( - Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, - SmallVectorImpl *Predicates = nullptr); +/// Returns true if the loop contains read-only memory accesses and doesn't +/// throw. Puts loads that may fault into \p NonDereferenceableAndAlignedLoads. +LLVM_ABI bool +isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT, + AssumptionCache *AC, + SmallVectorImpl &NonDereferenceableAndAlignedLoads, + SmallVectorImpl *Predicates = nullptr); /// Return true if we know that executing a load from this value cannot trap. /// diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 43ff084816d18..4eb17406f377e 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -445,6 +445,12 @@ class LoopVectorizationLegality { /// Returns a list of all known histogram operations in the loop. bool hasHistograms() const { return !Histograms.empty(); } + /// Returns potentially faulting loads. + const SmallPtrSetImpl & + getPotentiallyFaultingLoads() const { + return PotentiallyFaultingLoads; + } + PredicatedScalarEvolution *getPredicatedScalarEvolution() const { return &PSE; } @@ -630,6 +636,9 @@ class LoopVectorizationLegality { /// may work on the same memory location. SmallVector Histograms; + /// Hold potentially faulting loads. + SmallPtrSet PotentiallyFaultingLoads; + /// BFI and PSI are used to check for profile guided size optimizations. BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 78d0887d5d87e..d518eb6f90a70 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -856,16 +856,19 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To, return isPointerAlwaysReplaceable(From, To, DL); } -bool llvm::isDereferenceableReadOnlyLoop( +bool llvm::isReadOnlyLoop( Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + SmallVectorImpl &NonDereferenceableAndAlignedLoads, SmallVectorImpl *Predicates) { for (BasicBlock *BB : L->blocks()) { for (Instruction &I : *BB) { if (auto *LI = dyn_cast(&I)) { if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) - return false; - } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) + NonDereferenceableAndAlignedLoads.push_back(LI); + } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || + I.mayThrow()) { return false; + } } } return true; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index c47fd9421fddd..cad1641cd5d87 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1760,16 +1760,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock && "Expected latch predecessor to be the early exiting block"); - // TODO: Handle loops that may fault. Predicates.clear(); - if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, - &Predicates)) { - reportVectorizationFailure( - "Loop may fault", - "Cannot vectorize potentially faulting early exit loop", - "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); + SmallVector NonDerefLoads; + if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads, + &Predicates)) { + reportVectorizationFailure("Loop may fault", + "Cannot vectorize non-read-only early exit loop", + "NonReadOnlyEarlyExitLoop", ORE, TheLoop); return false; } + // Check non-dereferenceable loads if any. + for (LoadInst *LI : NonDerefLoads) { + // Only support unit-stride access for now. + int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand()); + if (Stride != 1) { + reportVectorizationFailure( + "Loop contains potentially faulting strided load", + "Cannot vectorize early exit loop with " + "strided fault-only-first load", + "EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop); + return false; + } + PotentiallyFaultingLoads.insert(LI); + LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI + << "\n"); + } [[maybe_unused]] const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9667b506e594f..0c82f48405101 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } + if (!LVL.getPotentiallyFaultingLoads().empty()) { + reportVectorizationFailure("Auto-vectorization of loops with potentially " + "faulting load is not supported", + "PotentiallyFaultingLoadsNotSupported", ORE, L); + return false; + } + // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before // even evaluating whether vectorization is profitable. Since we cannot modify diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll index e9b8e8cdda526..6954d04f53f04 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll @@ -208,7 +208,7 @@ loop.end: define i64 @same_exit_block_pre_inc_use1_too_small_allocas() { ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas' -; CHECK: LV: Not vectorizing: Loop may fault. +; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported. entry: %p1 = alloca [42 x i8] %p2 = alloca [42 x i8] @@ -238,7 +238,7 @@ loop.end: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) { ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_deref_ptrs' -; CHECK: LV: Not vectorizing: Loop may fault. +; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported. entry: br label %loop @@ -264,7 +264,7 @@ loop.end: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) { ; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_unknown_ptrs' -; CHECK: LV: Not vectorizing: Loop may fault. +; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported. entry: br label %loop @@ -287,6 +287,32 @@ loop.end: ret i64 %retval } +define ptr @same_exit_block_strided_unknown_ptr(ptr %first, ptr %last, i32 %value) { +; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_strided_unknown_ptr' +; CHECK: LV: Not vectorizing: Loop contains potentially faulting strided load. +entry: + %cond = icmp eq ptr %first, %last + br i1 %cond, label %return, label %for.body + +for.body: + %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ] + %1 = load i32, ptr %first.addr, align 4 + %cond2 = icmp eq i32 %1, %value + br i1 %cond2, label %for.end, label %for.inc + +for.inc: + %first.next = getelementptr inbounds i32, ptr %first.addr, i64 2 + %cond3 = icmp eq ptr %first.next, %last + br i1 %cond3, label %for.end, label %for.body + +for.end: + %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ] + br label %return + +return: + %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ] + ret ptr %retval +} ; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't ; support this yet. diff --git a/llvm/unittests/Analysis/LoadsTest.cpp b/llvm/unittests/Analysis/LoadsTest.cpp index c4f5b22318e34..815e200899d1c 100644 --- a/llvm/unittests/Analysis/LoadsTest.cpp +++ b/llvm/unittests/Analysis/LoadsTest.cpp @@ -120,7 +120,7 @@ define void @f(i32* %p1, i32* %p2, i64 %i) { EXPECT_TRUE(canReplacePointersInUseIfEqual(IcmpUse, P2, DL)); } -TEST(LoadsTest, IsDerefReadOnlyLoop) { +TEST(LoadsTest, IsReadOnlyLoop) { LLVMContext C; std::unique_ptr M = parseIR(C, R"IR( @@ -183,7 +183,8 @@ loop.end: TargetLibraryInfoImpl TLII(M->getTargetTriple()); TargetLibraryInfo TLI(TLII); - auto IsDerefReadOnlyLoop = [&TLI](Function *F) -> bool { + auto IsReadOnlyLoop = + [&TLI](Function *F, SmallVector &NonDerefLoads) -> bool { AssumptionCache AC(*F); DominatorTree DT(*F); LoopInfo LI(DT); @@ -195,9 +196,13 @@ loop.end: assert(Header->getName() == "loop"); Loop *L = LI.getLoopFor(Header); - return isDereferenceableReadOnlyLoop(L, &SE, &DT, &AC); + return isReadOnlyLoop(L, &SE, &DT, &AC, NonDerefLoads); }; - ASSERT_TRUE(IsDerefReadOnlyLoop(F1)); - ASSERT_FALSE(IsDerefReadOnlyLoop(F2)); + SmallVector NonDerefLoads; + ASSERT_TRUE(IsReadOnlyLoop(F1, NonDerefLoads)); + ASSERT_TRUE(NonDerefLoads.empty()); + ASSERT_TRUE(IsReadOnlyLoop(F2, NonDerefLoads)); + ASSERT_TRUE((NonDerefLoads.size() == 1) && + (NonDerefLoads[0]->getName() == "ld1")); }