Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 101 additions & 17 deletions llvm/test/Transforms/LoopUnrollAndJam/dependencies_multidims.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
; RUN: opt -da-disable-delinearization-checks -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
; RUN: opt -da-disable-delinearization-checks -aa-pipeline=basic-aa -passes='loop-unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

; XFAIL: *
; The transformation seems to have succeeded "accidentally". It should be fixed
; by PR #156578.

; CHECK-LABEL: sub_sub_less
; CHECK: %j = phi
; CHECK-NOT: %j.1 = phi
;
; sub_sub_less should NOT be unroll-and-jammed due to a loop-carried dependency.
; Memory accesses:
; - A[i][j] = 1 (write to current iteration)
; - A[i+1][j-1] = add (write to next i iteration, previous j iteration)
; The dependency: A[i+1][j-1] from iteration (i,j) may conflict with A[i'][j']
; from a later iteration when i'=i+1 and j'=j-1, creating a backward dependency
; in the j dimension that prevents safe unroll-and-jam.
define void @sub_sub_less(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
Expand All @@ -20,7 +22,7 @@ for.outer:
br label %for.inner

for.inner:
%j = phi i32 [ %add6, %for.inner ], [ 0, %for.outer ]
%j = phi i32 [ %add6, %for.inner ], [ 1, %for.outer ]
%sum = phi i32 [ %add, %for.inner ], [ 0, %for.outer ]
%arrayidx5 = getelementptr inbounds i32, ptr %B, i32 %j
%0 = load i32, ptr %arrayidx5, align 4
Expand Down Expand Up @@ -51,6 +53,31 @@ cleanup:
; CHECK: %j.1 = phi
; CHECK: %j.2 = phi
; CHECK: %j.3 = phi
;
; sub_sub_eq SHOULD be unroll-and-jammed (count=4) as it's safe.
; Memory accesses:
; - A[i][j] = 1 (write to current iteration)
; - A[i+1][j] = add (write to next i iteration, same j iteration)
; No dependency conflict: When unroll-and-jamming with count=4, the i loop
; iterations (i, i+1, i+2, i+3) are unrolled and their j loops are jammed
; together. Unroll-and-jam factor 4:
;
; for (int i = 0; i < N; i += 4)
; for (int j = 0; j < N; ++j) {
; // i iteration
; A[i][j] = 1; A[i+1][j] = sum_i;
; // i+1 iteration
; A[i+1][j] = 1; A[i+2][j] = sum_i1;
; // i+2 iteration
; A[i+2][j] = 1; A[i+3][j] = sum_i2;
; // i+3 iteration
; A[i+3][j] = 1; A[i+4][j] = sum_i3;
; }
;
; A[i+1][j] from iteration i doesn't conflict with A[i'][j'] from unrolled
; iterations since each unrolled i iteration accesses its own row i+1, i+2, i+3.
; j' values are identical, but accesses happen to different rows in the same j
; iteration before moving to the next j value.
define void @sub_sub_eq(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
Expand Down Expand Up @@ -92,6 +119,29 @@ cleanup:
; CHECK: %j.1 = phi
; CHECK: %j.2 = phi
; CHECK: %j.3 = phi
;
; sub_sub_more SHOULD be unroll-and-jammed (count=4) as it's safe.
; Memory accesses:
; - A[i][j] = 1 (write to current iteration)
; - A[i+1][j+1] = add (write to next i iteration, next j iteration)
; No dependency conflict: The forward dependency pattern (j+1 in i dimension)
; is safe. Unroll-and-jam factor 4:
;
; for (int i = 0; i < N; i += 4)
; for (int j = 0; j < N; ++j) {
; // i iteration
; A[i][j] = 1; A[i+1][j+1] = sum_i;
; // i+1 iteration
; A[i+1][j] = 1; A[i+2][j+1] = sum_i1;
; // i+2 iteration
; A[i+2][j] = 1; A[i+3][j+1] = sum_i2;
; // i+3 iteration
; A[i+3][j] = 1; A[i+4][j+1] = sum_i3;
; }
;
; A[i+1][j+1] from iteration i accesses row i+1 and column j+1, which is
; disjoint from the accesses in the same iteration. The forward dependency
; pattern doesn't create conflicts between unrolled i iterations.
define void @sub_sub_more(ptr noalias nocapture %A, i32 %N, ptr noalias nocapture readonly %B) {
entry:
%cmp = icmp sgt i32 %N, 0
Expand Down Expand Up @@ -130,12 +180,21 @@ cleanup:
; CHECK-LABEL: sub_sub_less_3d
; CHECK: %k = phi
; CHECK-NOT: %k.1 = phi

;
; sub_sub_less_3d should NOT be unroll-and-jammed due to a loop-carried dependency.
; Memory accesses:
; - A3d[i][j][k] = 0 (write to current iteration)
; - A3d[i+1][j][k-1] = 0 (write to next i iteration, previous k iteration)
; The dependency: A[i+1][j][k-1] from iteration (i,j,k) may conflict with
; A[i'][j'][k'] from a later iteration when i'=i+1 and k'=k-1, creating a
; backward dependency in the k dimension that prevents safe unroll-and-jam.
; This is a 3D version of the same pattern as sub_sub_less.
;
; for (long i = 0; i < 100; ++i)
; for (long j = 0; j < 100; ++j)
; for (long k = 0; k < 100; ++k) {
; A[i][j][k] = 0;
; A[i+1][j][k-1] = 0;
; for (long k = 1; k < 100; ++k) {
; A[i][j][k] = 5;
; A[i+1][j][k-1] = 10;
; }

define void @sub_sub_less_3d(ptr noalias %A) {
Expand All @@ -151,13 +210,13 @@ for.j:
br label %for.k

for.k:
%k = phi i32 [ 0, %for.j ], [ %inc.k, %for.k ]
%k = phi i32 [ 1, %for.j ], [ %inc.k, %for.k ]
%arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr %A, i32 %i, i32 %j, i32 %k
store i32 0, ptr %arrayidx, align 4
store i32 5, ptr %arrayidx, align 4
%add.i = add nsw i32 %i, 1
%sub.k = add nsw i32 %k, -1
%arrayidx2 = getelementptr inbounds [100 x [100 x i32]], ptr %A, i32 %add.i, i32 %j, i32 %sub.k
store i32 0, ptr %arrayidx2, align 4
store i32 10, ptr %arrayidx2, align 4
%inc.k = add nsw i32 %k, 1
%cmp.k = icmp slt i32 %inc.k, 100
br i1 %cmp.k, label %for.k, label %for.j.latch
Expand All @@ -178,8 +237,33 @@ for.end:

; CHECK-LABEL: sub_sub_outer_scalar
; CHECK: %k = phi
; CHECK-NOT: %k.1 = phi

; CHECK: %k.1 = phi
; CHECK: %k.2 = phi
; CHECK: %k.3 = phi
;
; sub_sub_outer_scalar SHOULD be unroll-and-jammed (count=4) as it's safe.
; Memory accesses:
; - load from A[j][k] (read from current j iteration)
; - store to A[j-1][k] (write to previous j iteration)
; The dependency: reading A[j][k] and writing A[j-1][k] creates a backward
; dependency, but execution order is preserved. Unroll-and-jam factor 4:
;
; for (int i = 0; i < 100; i++)
; for (int j = 1; j < 100; j += 4)
; for (int k = 0; k < 100; k++) {
; // j iteration
; temp0 = A[j][k]; A[j-1][k] = temp0;
; // j+1 iteration
; temp1 = A[j+1][k]; A[j][k] = temp1;
; // j+2 iteration
; temp2 = A[j+2][k]; A[j+1][k] = temp2;
; // j+3 iteration
; temp3 = A[j+3][k]; A[j+2][k] = temp3;
; }
;
; All k iterations for each j iteration (including j+1, j+2, j+3) are completed
; before moving to the next j group, so j+1's store to A[j][k] doesn't conflict
; with j's load from A[j][k] because they happen in different k loop invocations.
define void @sub_sub_outer_scalar(ptr %A) {
entry:
br label %for.i
Expand All @@ -189,7 +273,7 @@ for.i:
br label %for.j

for.j:
%j = phi i64 [ 0, %for.i ], [ %inc.j, %for.j.latch ]
%j = phi i64 [ 1, %for.i ], [ %inc.j, %for.j.latch ]
br label %for.k

for.k:
Expand Down