Skip to content

Commit befd167

Browse files
committed
[mlir][gpu] Fix cuda integration tests
https://reviews.llvm.org/D138758 has added `uniform` flag to gpu reduce ops, update integration tests. Differential Revision: https://reviews.llvm.org/D140014
1 parent e45cf47 commit befd167

File tree

8 files changed

+9
-9
lines changed

8 files changed

+9
-9
lines changed

mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func.func @main() {
5555
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1)
5656
threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
5757
%val = memref.load %data[%bx, %tx] : memref<2x6xi32>
58-
%reduced = gpu.all_reduce and %val {} : (i32) -> (i32)
58+
%reduced = gpu.all_reduce and %val uniform {} : (i32) -> (i32)
5959
memref.store %reduced, %sum[%bx] : memref<2xi32>
6060
gpu.terminator
6161
}

mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func.func @main() {
5555
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1)
5656
threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
5757
%val = memref.load %data[%bx, %tx] : memref<2x6xi32>
58-
%reduced = gpu.all_reduce max %val {} : (i32) -> (i32)
58+
%reduced = gpu.all_reduce max %val uniform {} : (i32) -> (i32)
5959
memref.store %reduced, %sum[%bx] : memref<2xi32>
6060
gpu.terminator
6161
}

mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func.func @main() {
5555
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1)
5656
threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
5757
%val = memref.load %data[%bx, %tx] : memref<2x6xi32>
58-
%reduced = gpu.all_reduce min %val {} : (i32) -> (i32)
58+
%reduced = gpu.all_reduce min %val uniform {} : (i32) -> (i32)
5959
memref.store %reduced, %sum[%bx] : memref<2xi32>
6060
gpu.terminator
6161
}

mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func.func @main() {
2828
%idx = arith.addi %tx, %t2 : index
2929
%t3 = arith.index_cast %idx : index to i32
3030
%val = arith.sitofp %t3 : i32 to f32
31-
%sum = gpu.all_reduce add %val {} : (f32) -> (f32)
31+
%sum = gpu.all_reduce add %val uniform {} : (f32) -> (f32)
3232
memref.store %sum, %dst[%tz, %ty, %tx] : memref<?x?x?xf32>
3333
gpu.terminator
3434
}

mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func.func @main() {
5555
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1)
5656
threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
5757
%val = memref.load %data[%bx, %tx] : memref<2x6xi32>
58-
%reduced = gpu.all_reduce or %val {} : (i32) -> (i32)
58+
%reduced = gpu.all_reduce or %val uniform {} : (i32) -> (i32)
5959
memref.store %reduced, %sum[%bx] : memref<2xi32>
6060
gpu.terminator
6161
}

mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ func.func @main() {
2020
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
2121
threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
2222
%val = arith.index_cast %tx : index to i32
23-
%xor = gpu.all_reduce %val {
23+
%xor = gpu.all_reduce %val uniform {
2424
^bb(%lhs : i32, %rhs : i32):
2525
%xor = arith.xori %lhs, %rhs : i32
2626
"gpu.yield"(%xor) : (i32) -> ()

mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func.func @main() {
5555
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1)
5656
threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
5757
%val = memref.load %data[%bx, %tx] : memref<2x6xi32>
58-
%reduced = gpu.all_reduce xor %val {} : (i32) -> (i32)
58+
%reduced = gpu.all_reduce xor %val uniform {} : (i32) -> (i32)
5959
memref.store %reduced, %sum[%bx] : memref<2xi32>
6060
gpu.terminator
6161
}

mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ func.func @main() {
5858
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1)
5959
threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) {
6060
%val = memref.load %data[%bx, %tx] : memref<2x6xf32>
61-
%reduced0 = gpu.all_reduce add %val {} : (f32) -> (f32)
61+
%reduced0 = gpu.all_reduce add %val uniform {} : (f32) -> (f32)
6262
memref.store %reduced0, %sum[%bx] : memref<2xf32>
63-
%reduced1 = gpu.all_reduce mul %val {} : (f32) -> (f32)
63+
%reduced1 = gpu.all_reduce mul %val uniform {} : (f32) -> (f32)
6464
memref.store %reduced1, %mul[%bx] : memref<2xf32>
6565
gpu.terminator
6666
}

0 commit comments

Comments
 (0)