Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions llvm/lib/Transforms/Scalar/LoopFuse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ STATISTIC(OnlySecondCandidateIsGuarded,
"The second candidate is guarded while the first one is not");
STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
STATISTIC(NumDepSafeFused, "Number of fused loops with dependencies "
"proven safe based on the dependence direction");

enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
Expand Down Expand Up @@ -1349,6 +1351,33 @@ struct LoopFuser {
<< "\n");
}
#endif
unsigned Levels = DepResult->getLevels();
unsigned SeparateLevels = DepResult->getSeparateLevels();
unsigned CurLoopLevel = FC0.L->getLoopDepth();

bool OuterEqDir = true;
for (unsigned II = 1; II <= std::min(CurLoopLevel - 1, Levels); ++II) {
unsigned Direction = DepResult->getDirection(II, II > Levels);
if (!(Direction & Dependence::DVEntry::EQ)) {
// Different accesses in the outer levels of CurLoopLevel
OuterEqDir = false;
break;
}
}
if (!OuterEqDir || CurLoopLevel > Levels + SeparateLevels) {
LLVM_DEBUG(dbgs() << "Safe to fuse with no dependency\n");
NumDepSafeFused++;
return true;
}

assert(CurLoopLevel > Levels && "Fusion candidates are not separated");
unsigned CurDir = DepResult->getDirection(CurLoopLevel, true);
if (!(CurDir & Dependence::DVEntry::GT)) {
LLVM_DEBUG(dbgs() << "Safe to fuse with backward loop-carried "
"dependency\n");
NumDepSafeFused++;
return true;
}

if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
LLVM_DEBUG(
Expand Down
185 changes: 185 additions & 0 deletions llvm/test/Transforms/LoopFusion/backward_loop_carried.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
; RUN: opt -S -passes=loop-fusion -da-disable-delinearization-checks < %s | FileCheck %s

; The two inner loops have no dependency and are allowed to be fused as in the
; outer loops, different levels are accessed to.

; C Code
;
;; for (long int i = 0; i < n; i++) {
;; for (long int j = 0; j < n; j++) {
;; for (long int k = 0; k < n; k++) {
;; A[i][j][k] = i;
;; }
;; for (long int k = 0; k < n; k++) {
;; temp = A[i + 3][j + 2][k + 1];

define void @backward_dep0(i64 %n, ptr %A) nounwind uwtable ssp {
entry:
%cmp10 = icmp sgt i64 %n, 0
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26

; CHECK-LABEL: backward_dep
; CHECK-COUNT-1: for.body{{[0-9]+}}:
; CHECK-NOT: for.body{{[0-9]+}}:

for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader

for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
%cmp26 = icmp sgt i64 %n, 0
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24

for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
br label %for.cond4.preheader

for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
%cmp51 = icmp sgt i64 %n, 0
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit

for.body6.preheader: ; preds = %for.cond4.preheader
br label %for.body6

for.body6: ; preds = %for.body6.preheader, %for.body6
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
store i64 %i.011, ptr %arrayidx8, align 8
%inc = add nsw i64 %k.02, 1
%exitcond13 = icmp ne i64 %inc, %n
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit

for.cond10.loopexit.loopexit: ; preds = %for.body6
br label %for.cond10.loopexit

for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
%cmp113 = icmp sgt i64 %n, 0
br i1 %cmp113, label %for.body12.preheader, label %for.inc21

for.body12.preheader: ; preds = %for.cond10.loopexit
br label %for.body12

for.body12: ; preds = %for.body12.preheader, %for.body12
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
%add = add nsw i64 %k9.05, 1
%add13 = add nsw i64 %j.07, 2
%add14 = add nsw i64 %i.011, 3
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
%0 = load i64, ptr %arrayidx17, align 8
%inc19 = add nsw i64 %k9.05, 1
%exitcond = icmp ne i64 %inc19, %n
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit

for.inc21.loopexit: ; preds = %for.body12
br label %for.inc21

for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
%inc22 = add nsw i64 %j.07, 1
%exitcond14 = icmp ne i64 %inc22, %n
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit

for.inc24.loopexit: ; preds = %for.inc21
br label %for.inc24

for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
%inc25 = add nsw i64 %i.011, 1
%exitcond15 = icmp ne i64 %inc25, %n
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit

for.end26.loopexit: ; preds = %for.inc24
br label %for.end26

for.end26: ; preds = %for.end26.loopexit, %entry
ret void
}

; The two inner loops have a backward loop-carried dependency, allowing them
; to be fused.

; C Code
;
;; for (long int i = 0; i < n; i++) {
;; for (long int j = 0; j < n; j++) {
;; for (long int k = 0; k < n; k++) {
;; A[i][j][k] = i;
;; }
;; for (long int k = 0; k < n; k++) {
;; temp = A[i][j][k - 1];

define void @backward_dep1(i64 %n, ptr %A) nounwind uwtable ssp {
entry:
%cmp10 = icmp sgt i64 %n, 0
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26

; CHECK-LABEL: backward_dep
; CHECK-COUNT-1: for.body{{[0-9]+}}:
; CHECK-NOT: for.body{{[0-9]+}}:

for.cond1.preheader.preheader: ; preds = %entry
br label %for.cond1.preheader

for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
%cmp26 = icmp sgt i64 %n, 0
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24

for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
br label %for.cond4.preheader

for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
%cmp51 = icmp sgt i64 %n, 0
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit

for.body6.preheader: ; preds = %for.cond4.preheader
br label %for.body6

for.body6: ; preds = %for.body6.preheader, %for.body6
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
store i64 %i.011, ptr %arrayidx8, align 8
%inc = add nsw i64 %k.02, 1
%exitcond13 = icmp ne i64 %inc, %n
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit

for.cond10.loopexit.loopexit: ; preds = %for.body6
br label %for.cond10.loopexit

for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
%cmp113 = icmp sgt i64 %n, 0
br i1 %cmp113, label %for.body12.preheader, label %for.inc21

for.body12.preheader: ; preds = %for.cond10.loopexit
br label %for.body12

for.body12: ; preds = %for.body12.preheader, %for.body12
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
%add = add nsw i64 %k9.05, -1
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %add
%0 = load i64, ptr %arrayidx17, align 8
%inc19 = add nsw i64 %k9.05, 1
%exitcond = icmp ne i64 %inc19, %n
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit

for.inc21.loopexit: ; preds = %for.body12
br label %for.inc21

for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
%inc22 = add nsw i64 %j.07, 1
%exitcond14 = icmp ne i64 %inc22, %n
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit

for.inc24.loopexit: ; preds = %for.inc21
br label %for.inc24

for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
%inc25 = add nsw i64 %i.011, 1
%exitcond15 = icmp ne i64 %inc25, %n
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit

for.end26.loopexit: ; preds = %for.inc24
br label %for.end26

for.end26: ; preds = %for.end26.loopexit, %entry
ret void
}
28 changes: 12 additions & 16 deletions llvm/test/Transforms/LoopFusion/simple.ll
Original file line number Diff line number Diff line change
Expand Up @@ -300,40 +300,36 @@ bb23: ; preds = %bb17, %bb

define void @forward_dep(ptr noalias %arg) {
; CHECK-LABEL: @forward_dep(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[BB7:%.*]]
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: br label %[[BB7:.*]]
; CHECK: bb7:
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ]
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP15:%.*]], %[[BB25:.*]] ]
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], %[[BB25]] ]
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB25]] ], [ 0, %[[BB]] ]
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[DOT013]], -3
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP]], [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV22]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[INDVARS_IV22]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV22]]
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4
; CHECK-NEXT: br label [[BB14]]
; CHECK-NEXT: br label %[[BB14:.*]]
; CHECK: bb14:
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
; CHECK-NEXT: br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]]
; CHECK: bb19.preheader:
; CHECK-NEXT: br label [[BB19:%.*]]
; CHECK: bb19:
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ]
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i32 [[TMP22]], 3
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV1]]
; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4
; CHECK-NEXT: br label [[BB25]]
; CHECK-NEXT: br label %[[BB25]]
; CHECK: bb25:
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]]
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[BB7]], label %[[BB26:.*]]
; CHECK: bb26:
; CHECK-NEXT: ret void
;
Expand Down