diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index cab70c5c01a45..c3c53f783dbed 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -677,7 +677,7 @@ class AccessAnalysis { const DenseMap &Strides, DenseMap &DepSetId, Loop *TheLoop, unsigned &RunningDepId, - unsigned ASId, bool ShouldCheckStride, bool Assume); + unsigned ASId, bool Assume); /// Check whether we can check the pointers at runtime for /// non-intersection. @@ -685,8 +685,9 @@ class AccessAnalysis { /// Returns true if we need no check or if we do and we can generate them /// (i.e. the pointers have computable bounds). bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, - Loop *TheLoop, const DenseMap &Strides, - Value *&UncomputablePtr, bool ShouldCheckWrap = false); + Loop *TheLoop, + const DenseMap &Strides, + Value *&UncomputablePtr); /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. @@ -1115,13 +1116,11 @@ findForkedPointer(PredicatedScalarEvolution &PSE, return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}}; } -bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, - MemAccessInfo Access, Type *AccessTy, - const DenseMap &StridesMap, - DenseMap &DepSetId, - Loop *TheLoop, unsigned &RunningDepId, - unsigned ASId, bool ShouldCheckWrap, - bool Assume) { +bool AccessAnalysis::createCheckForAccess( + RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy, + const DenseMap &StridesMap, + DenseMap &DepSetId, Loop *TheLoop, + unsigned &RunningDepId, unsigned ASId, bool Assume) { Value *Ptr = Access.getPointer(); SmallVector> TranslatedPtrs = @@ -1152,8 +1151,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. - if (ShouldCheckWrap && - !isNoWrap(PSE, AR, TranslatedPtrs.size() == 1 ? Ptr : nullptr, AccessTy, + if (!isNoWrap(PSE, AR, TranslatedPtrs.size() == 1 ? Ptr : nullptr, AccessTy, TheLoop, Assume)) { return false; } @@ -1182,10 +1180,10 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, return true; } -bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, - ScalarEvolution *SE, Loop *TheLoop, - const DenseMap &StridesMap, - Value *&UncomputablePtr, bool ShouldCheckWrap) { +bool AccessAnalysis::canCheckPtrAtRT( + RuntimePointerChecking &RtCheck, ScalarEvolution *SE, Loop *TheLoop, + const DenseMap &StridesMap, + Value *&UncomputablePtr) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; @@ -1245,7 +1243,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, for (const auto &AccessTy : Accesses[Access]) { if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, - ShouldCheckWrap, false)) { + false)) { LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Access.getPointer() << '\n'); Retries.emplace_back(Access, AccessTy); @@ -1275,7 +1273,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, for (const auto &[Access, AccessTy] : Retries) { if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, - ShouldCheckWrap, /*Assume=*/true)) { + /*Assume=*/true)) { CanDoAliasSetRT = false; UncomputablePtr = Access.getPointer(); break; @@ -2643,9 +2641,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. Value *UncomputablePtr = nullptr; - bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, - SymbolicStrides, UncomputablePtr, false); + bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( + *PtrRtChecking, PSE->getSE(), TheLoop, SymbolicStrides, UncomputablePtr); if (!CanDoRTIfNeeded) { const auto *I = dyn_cast_or_null(UncomputablePtr); recordAnalysis("CantIdentifyArrayBounds", I) @@ -2676,7 +2673,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, auto *SE = PSE->getSE(); UncomputablePtr = nullptr; CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( - *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true); + *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr); // Check that we found the bounds for the pointer. if (!CanDoRTIfNeeded) { diff --git a/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll b/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll index 5234d8f107271..d4f7f82a8cff1 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/nusw-predicates.ll @@ -15,6 +15,12 @@ define void @int_and_pointer_predicate(ptr %v, i32 %N) { ; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GRP1:0x[0-9a-f]+]]: +; CHECK-NEXT: (Low: %v High: (2 + %v)) +; CHECK-NEXT: Member: %v +; CHECK-NEXT: Group [[GRP2:0x[0-9a-f]+]]: +; CHECK-NEXT: (Low: %v High: (6 + (4 * (trunc i32 %N to i16)) + %v)) +; CHECK-NEXT: Member: {%v,+,4}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -57,36 +63,36 @@ define void @int_and_multiple_pointer_predicates(ptr %v, ptr %w, i32 %N) { ; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: -; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]): +; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]): ; CHECK-NEXT: ptr %v -; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): +; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]): ; CHECK-NEXT: ptr %w ; CHECK-NEXT: Check 1: -; CHECK-NEXT: Comparing group ([[GRP1]]): +; CHECK-NEXT: Comparing group ([[GRP3]]): ; CHECK-NEXT: ptr %v -; CHECK-NEXT: Against group ([[GRP3:0x[0-9a-f]+]]): +; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]): ; CHECK-NEXT: %gep.w = getelementptr { i16, i16 }, ptr %w, i16 %iv.i16 ; CHECK-NEXT: Check 2: -; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]): +; CHECK-NEXT: Comparing group ([[GRP6:0x[0-9a-f]+]]): ; CHECK-NEXT: %gep.v = getelementptr { i16, i16 }, ptr %v, i16 %iv.i16 -; CHECK-NEXT: Against group ([[GRP2]]): +; CHECK-NEXT: Against group ([[GRP4]]): ; CHECK-NEXT: ptr %w ; CHECK-NEXT: Check 3: -; CHECK-NEXT: Comparing group ([[GRP4]]): +; CHECK-NEXT: Comparing group ([[GRP6]]): ; CHECK-NEXT: %gep.v = getelementptr { i16, i16 }, ptr %v, i16 %iv.i16 -; CHECK-NEXT: Against group ([[GRP3]]): +; CHECK-NEXT: Against group ([[GRP5]]): ; CHECK-NEXT: %gep.w = getelementptr { i16, i16 }, ptr %w, i16 %iv.i16 ; CHECK-NEXT: Grouped accesses: -; CHECK-NEXT: Group [[GRP1]]: +; CHECK-NEXT: Group [[GRP3]]: ; CHECK-NEXT: (Low: %v High: (2 + %v)) ; CHECK-NEXT: Member: %v -; CHECK-NEXT: Group [[GRP4]]: +; CHECK-NEXT: Group [[GRP6]]: ; CHECK-NEXT: (Low: %v High: (6 + (4 * (trunc i32 %N to i16)) + %v)) ; CHECK-NEXT: Member: {%v,+,4}<%loop> -; CHECK-NEXT: Group [[GRP2]]: +; CHECK-NEXT: Group [[GRP4]]: ; CHECK-NEXT: (Low: %w High: (2 + %w)) ; CHECK-NEXT: Member: %w -; CHECK-NEXT: Group [[GRP3]]: +; CHECK-NEXT: Group [[GRP5]]: ; CHECK-NEXT: (Low: %w High: (6 + (4 * (trunc i32 %N to i16)) + %w)) ; CHECK-NEXT: Member: {%w,+,4}<%loop> ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll index 38b7389ae9083..021447d53f943 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll @@ -163,7 +163,7 @@ exit: define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_1(ptr %a, ptr %b, ptr %c, i64 %offset, i64 %n) { ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_1' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: cannot check memory dependencies at runtime +; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: @@ -204,7 +204,7 @@ exit: define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_2(ptr %a, ptr %b, ptr %c, i64 %offset, i64 %n) { ; CHECK-LABEL: 'dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_may_wrap_2' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: cannot check memory dependencies at runtime +; CHECK-NEXT: Report: cannot identify array bounds ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll index 26c571b9cb63a..a15253a901488 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll @@ -72,27 +72,27 @@ define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_st ; CHECK-NEXT: Comparing group ([[GRP4:0x[0-9a-f]+]]): ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group ([[GRP5:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset +; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group ([[GRP4]]): ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Check 2: ; CHECK-NEXT: Comparing group ([[GRP5]]): -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset -; CHECK-NEXT: Against group ([[GRP6]]): ; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 +; CHECK-NEXT: Against group ([[GRP6]]): +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group [[GRP4]]: ; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a)) ; CHECK-NEXT: Member: {%a,+,4}<%loop> ; CHECK-NEXT: Group [[GRP5]]: -; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) -; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> -; CHECK-NEXT: Group [[GRP6]]: ; CHECK-NEXT: (Low: %b High: (-1 + (5 * %n) + %b)) ; CHECK-NEXT: Member: {%b,+,5}<%loop> +; CHECK-NEXT: Group [[GRP6]]: +; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) +; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -265,27 +265,27 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt ; CHECK-NEXT: Comparing group ([[GRP13:0x[0-9a-f]+]]): ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group ([[GRP14:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset +; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group ([[GRP13]]): ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group ([[GRP15:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Check 2: ; CHECK-NEXT: Comparing group ([[GRP14]]): -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset -; CHECK-NEXT: Against group ([[GRP15]]): ; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 +; CHECK-NEXT: Against group ([[GRP15]]): +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group [[GRP13]]: ; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a)) ; CHECK-NEXT: Member: {%a,+,4}<%loop> ; CHECK-NEXT: Group [[GRP14]]: -; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) -; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> -; CHECK-NEXT: Group [[GRP15]]: ; CHECK-NEXT: (Low: %b High: (-4 + (8 * %n) + %b)) ; CHECK-NEXT: Member: {%b,+,8}<%loop> +; CHECK-NEXT: Group [[GRP15]]: +; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) +; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll index b27937862b261..cce6f829d05af 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/runtime-checks-may-wrap.ll @@ -11,20 +11,21 @@ define void @geps_may_wrap(ptr %a, ptr %b, i64 %N) { ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.iv = getelementptr i32, ptr %a, i64 %iv -; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): ; CHECK-NEXT: ptr %b +; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): +; CHECK-NEXT: %gep.iv = getelementptr i32, ptr %a, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group [[GRP1]]: -; CHECK-NEXT: (Low: %a High: (16 + (12 * (trunc i128 ((zext i64 %N to i128) /u 3) to i16)) + %a)) -; CHECK-NEXT: Member: {%a,+,12}<%loop> -; CHECK-NEXT: Group [[GRP2]]: ; CHECK-NEXT: (Low: %b High: (4 + %b)) ; CHECK-NEXT: Member: %b +; CHECK-NEXT: Group [[GRP2]]: +; CHECK-NEXT: (Low: %a High: (16 + (12 * (trunc i128 ((zext i64 %N to i128) /u 3) to i16)) + %a)) +; CHECK-NEXT: Member: {%a,+,12}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: ; CHECK-NEXT: {0,+,3}<%loop> Added Flags: +; CHECK-NEXT: {%a,+,12}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 754b86ab2fb87..cf4fc143fe8c3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -1123,15 +1123,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; DEFAULT: vector.scevcheck: -; DEFAULT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4 ; DEFAULT-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; DEFAULT-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; DEFAULT-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; DEFAULT-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] -; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] -; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] +; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]] +; DEFAULT-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]] ; DEFAULT-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; DEFAULT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; DEFAULT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 4 ; DEFAULT-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; DEFAULT-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; DEFAULT-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -1139,12 +1138,13 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]] ; DEFAULT-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]] +; DEFAULT-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8 ; DEFAULT-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; DEFAULT-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 ; DEFAULT-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 ; DEFAULT-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT6]] -; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT6]] -; DEFAULT-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[DST]] +; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]] +; DEFAULT-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]] ; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]] ; DEFAULT-NEXT: [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]] ; DEFAULT-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]] @@ -1337,15 +1337,14 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; PRED: vector.scevcheck: -; PRED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 4 ; PRED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; PRED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; PRED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; PRED-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] -; PRED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] -; PRED-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] +; PRED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]] +; PRED-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]] ; PRED-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] -; PRED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; PRED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 4 ; PRED-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; PRED-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; PRED-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -1353,12 +1352,13 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; PRED-NEXT: [[TMP7:%.*]] = icmp ult ptr [[TMP6]], [[SCEVGEP1]] ; PRED-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[MUL_OVERFLOW4]] +; PRED-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 8 ; PRED-NEXT: [[MUL5:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[N]]) ; PRED-NEXT: [[MUL_RESULT6:%.*]] = extractvalue { i64, i1 } [[MUL5]], 0 ; PRED-NEXT: [[MUL_OVERFLOW7:%.*]] = extractvalue { i64, i1 } [[MUL5]], 1 ; PRED-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT6]] -; PRED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT6]] -; PRED-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[DST]] +; PRED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 [[MUL_RESULT6]] +; PRED-NEXT: [[TMP11:%.*]] = icmp ult ptr [[TMP10]], [[SCEVGEP4]] ; PRED-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW7]] ; PRED-NEXT: [[TMP13:%.*]] = or i1 [[TMP4]], [[TMP8]] ; PRED-NEXT: [[TMP14:%.*]] = or i1 [[TMP13]], [[TMP12]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 4ea248254f2c6..f7b8758084056 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -500,18 +500,47 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: entry: ; STRIDED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 -; STRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 24, i64 [[TMP1]]) +; STRIDED-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 80, i64 [[TMP1]]) ; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]] ; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; STRIDED: vector.scevcheck: +; STRIDED-NEXT: [[TMP24:%.*]] = shl i64 [[STRIDE:%.*]], 2 +; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], -4 +; STRIDED-NEXT: [[TMP26:%.*]] = icmp slt i64 [[TMP24]], 0 +; STRIDED-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP25]], i64 [[TMP24]] +; STRIDED-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP27]], i64 1023) +; STRIDED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; STRIDED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; STRIDED-NEXT: [[TMP28:%.*]] = sub i64 0, [[MUL_RESULT]] +; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[MUL_RESULT]] +; STRIDED-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP28]] +; STRIDED-NEXT: [[TMP31:%.*]] = icmp ult ptr [[TMP29]], [[P2]] +; STRIDED-NEXT: [[TMP32:%.*]] = icmp ugt ptr [[TMP30]], [[P2]] +; STRIDED-NEXT: [[TMP33:%.*]] = select i1 [[TMP26]], i1 [[TMP32]], i1 [[TMP31]] +; STRIDED-NEXT: [[TMP13:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW]] +; STRIDED-NEXT: [[TMP34:%.*]] = icmp slt i64 [[TMP24]], 0 +; STRIDED-NEXT: [[TMP15:%.*]] = select i1 [[TMP34]], i64 [[TMP25]], i64 [[TMP24]] +; STRIDED-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[TMP15]], i64 1023) +; STRIDED-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 +; STRIDED-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 +; STRIDED-NEXT: [[TMP16:%.*]] = sub i64 0, [[MUL_RESULT2]] +; STRIDED-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[MUL_RESULT2]] +; STRIDED-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP16]] +; STRIDED-NEXT: [[TMP37:%.*]] = icmp ult ptr [[TMP35]], [[P]] +; STRIDED-NEXT: [[TMP38:%.*]] = icmp ugt ptr [[TMP36]], [[P]] +; STRIDED-NEXT: [[TMP39:%.*]] = select i1 [[TMP34]], i1 [[TMP38]], i1 [[TMP37]] +; STRIDED-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[MUL_OVERFLOW3]] +; STRIDED-NEXT: [[TMP23:%.*]] = or i1 [[TMP13]], [[TMP40]] +; STRIDED-NEXT: br i1 [[TMP23]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK1:%.*]] ; STRIDED: vector.memcheck: -; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 4092 -; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]] +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[STRIDE]], 4092 +; STRIDED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP3]] ; STRIDED-NEXT: [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]] ; STRIDED-NEXT: [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]] ; STRIDED-NEXT: [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]] ; STRIDED-NEXT: [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]] ; STRIDED-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4 -; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]] +; STRIDED-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP3]] ; STRIDED-NEXT: [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]] ; STRIDED-NEXT: [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]] ; STRIDED-NEXT: [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]] @@ -554,7 +583,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ] ; STRIDED-NEXT: br label [[LOOP:%.*]] ; STRIDED: loop: ; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index cf66264486095..b885d85a96800 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -114,8 +114,18 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-SAME: ptr [[ARG:%.*]], i64 [[ARG1:%.*]], ptr [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ARG1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 18 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG]], i64 16 +; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[ARG1]]) +; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[MUL_RESULT]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[SCEVGEP]] +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[ARG1]], 4 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 16 @@ -167,7 +177,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -266,19 +276,26 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 52 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[N]], 3 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 24 ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp ult ptr [[TMP32]], [[A]] +; CHECK-NEXT: [[TMP44:%.*]] = or i1 [[TMP41]], [[MUL_OVERFLOW]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) +; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 +; CHECK-NEXT: [[TMP55:%.*]] = sub i64 0, [[MUL_RESULT2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT2]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[TMP4]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 28 +; CHECK-NEXT: [[TMP57:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 @@ -286,7 +303,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 [[MUL_RESULT3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP1]] ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 20 +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 12 ; CHECK-NEXT: [[MUL6:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT7:%.*]] = extractvalue { i64, i1 } [[MUL6]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW8:%.*]] = extractvalue { i64, i1 } [[MUL6]], 1 @@ -302,7 +319,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[SCEVGEP9]], i64 [[MUL_RESULT11]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp ult ptr [[TMP16]], [[SCEVGEP9]] ; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW12]] -; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 12 +; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[A]], i64 20 ; CHECK-NEXT: [[MUL14:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT15:%.*]] = extractvalue { i64, i1 } [[MUL14]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW16:%.*]] = extractvalue { i64, i1 } [[MUL14]], 1 @@ -310,7 +327,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SCEVGEP13]], i64 [[MUL_RESULT15]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp ult ptr [[TMP20]], [[SCEVGEP13]] ; CHECK-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW16]] -; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 8 +; CHECK-NEXT: [[SCEVGEP17:%.*]] = getelementptr i8, ptr [[A]], i64 24 ; CHECK-NEXT: [[MUL18:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT19:%.*]] = extractvalue { i64, i1 } [[MUL18]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW20:%.*]] = extractvalue { i64, i1 } [[MUL18]], 1 @@ -318,7 +335,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[SCEVGEP17]], i64 [[MUL_RESULT19]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp ult ptr [[TMP24]], [[SCEVGEP17]] ; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP25]], [[MUL_OVERFLOW20]] -; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[SCEVGEP21:%.*]] = getelementptr i8, ptr [[A]], i64 28 ; CHECK-NEXT: [[MUL22:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT23:%.*]] = extractvalue { i64, i1 } [[MUL22]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW24:%.*]] = extractvalue { i64, i1 } [[MUL22]], 1 @@ -326,37 +343,47 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SCEVGEP21]], i64 [[MUL_RESULT23]] ; CHECK-NEXT: [[TMP29:%.*]] = icmp ult ptr [[TMP28]], [[SCEVGEP21]] ; CHECK-NEXT: [[TMP30:%.*]] = or i1 [[TMP29]], [[MUL_OVERFLOW24]] +; CHECK-NEXT: [[SCEVGEP31:%.*]] = getelementptr i8, ptr [[B]], i64 4 +; CHECK-NEXT: [[MUL29:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]]) +; CHECK-NEXT: [[MUL_RESULT30:%.*]] = extractvalue { i64, i1 } [[MUL29]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW31:%.*]] = extractvalue { i64, i1 } [[MUL29]], 1 +; CHECK-NEXT: [[TMP67:%.*]] = sub i64 0, [[MUL_RESULT30]] +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SCEVGEP31]], i64 [[MUL_RESULT30]] +; CHECK-NEXT: [[TMP69:%.*]] = icmp ult ptr [[TMP68]], [[SCEVGEP31]] +; CHECK-NEXT: [[TMP70:%.*]] = or i1 [[TMP69]], [[MUL_OVERFLOW31]] ; CHECK-NEXT: [[MUL25:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 32, i64 [[TMP2]]) ; CHECK-NEXT: [[MUL_RESULT26:%.*]] = extractvalue { i64, i1 } [[MUL25]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW27:%.*]] = extractvalue { i64, i1 } [[MUL25]], 1 ; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[MUL_RESULT26]] -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT26]] -; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP32]], [[A]] +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[B]], i64 [[MUL_RESULT26]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp ult ptr [[TMP71]], [[B]] ; CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[MUL_OVERFLOW27]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP44]], [[TMP57]] ; CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP6]], [[TMP10]] ; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP14]] ; CHECK-NEXT: [[TMP37:%.*]] = or i1 [[TMP36]], [[TMP18]] ; CHECK-NEXT: [[TMP38:%.*]] = or i1 [[TMP37]], [[TMP22]] ; CHECK-NEXT: [[TMP39:%.*]] = or i1 [[TMP38]], [[TMP26]] ; CHECK-NEXT: [[TMP40:%.*]] = or i1 [[TMP39]], [[TMP30]] -; CHECK-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP34]] -; CHECK-NEXT: br i1 [[TMP41]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: [[TMP72:%.*]] = or i1 [[TMP40]], [[TMP70]] +; CHECK-NEXT: [[TMP73:%.*]] = or i1 [[TMP72]], [[TMP34]] +; CHECK-NEXT: br i1 [[TMP73]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP42:%.*]] = lshr i64 [[N]], 3 ; CHECK-NEXT: [[TMP43:%.*]] = shl i64 [[TMP42]], 5 -; CHECK-NEXT: [[TMP44:%.*]] = add i64 [[TMP43]], 32 -; CHECK-NEXT: [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP44]] ; CHECK-NEXT: [[TMP45:%.*]] = add nuw nsw i64 [[TMP43]], 4 ; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP45]] +; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[TMP43]], 32 +; CHECK-NEXT: [[SCEVGEP28:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP53]] ; CHECK-NEXT: [[TMP46:%.*]] = shl i64 [[TMP42]], 4 ; CHECK-NEXT: [[TMP47:%.*]] = add nuw nsw i64 [[TMP46]], 8 ; CHECK-NEXT: [[SCEVGEP30:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP47]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[BOUND031:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]] +; CHECK-NEXT: [[BOUND2:%.*]] = icmp ult ptr [[A]], [[SCEVGEP29]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND2]] ; CHECK-NEXT: [[BOUND132:%.*]] = icmp ult ptr [[B]], [[SCEVGEP28]] -; CHECK-NEXT: [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND031]], [[BOUND132]] +; CHECK-NEXT: [[BOUND133:%.*]] = icmp ult ptr [[A]], [[SCEVGEP30]] +; CHECK-NEXT: [[FOUND_CONFLICT33:%.*]] = and i1 [[BOUND132]], [[BOUND133]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT33]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -378,7 +405,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[STRIDED_VEC34:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP50]] ; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[B]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP54]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison), !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC34]], <4 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_GATHER]], <4 x i32> zeroinitializer, <8 x i32> @@ -390,7 +417,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -431,7 +458,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N) ; CHECK-NEXT: store i32 0, ptr [[GEP_A_7]], align 4 ; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -498,7 +525,7 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -512,7 +539,7 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -620,7 +647,7 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -634,7 +661,7 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -670,10 +697,12 @@ attributes #1 = { "min-legal-vector-width"="0" "target-cpu"="cascadelake" } ; CHECK: [[META6]] = !{[[META7:![0-9]+]]} ; CHECK: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} ; CHECK: [[META8]] = distinct !{[[META8]], !"LVerDomain"} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} +; CHECK: [[META9]] = !{[[META10:![0-9]+]]} +; CHECK: [[META10]] = distinct !{[[META10]], [[META8]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} -; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]} ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} ; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]} +; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]], [[META2]]} +; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META1]]} ;.