Skip to content

Commit f6071e9

Browse files
authored
[FIR][OpenACC] fix loop order with generatePrivateInit (#155002)
When generating a loop nest to initialize a private array, the loop nest should run from the slowest dimension to the fastest dimension. When you get a shape from a SequenceType it is from fastest to slowest dimension. Reverse the the order. This makes it similar to how array syntax would get lowered.
1 parent 3c609f3 commit f6071e9

File tree

2 files changed

+33
-1
lines changed

2 files changed

+33
-1
lines changed

flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -591,7 +591,8 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
591591
hlfir::AssignOp::create(firBuilder, loc, initVal,
592592
declareOp.getBase());
593593
} else {
594-
for (auto ext : seqTy.getShape()) {
594+
// Generate loop nest from slowest to fastest running dimension
595+
for (auto ext : llvm::reverse(seqTy.getShape())) {
595596
auto lb = firBuilder.createIntegerConstant(loc, idxTy, 0);
596597
auto ub = firBuilder.createIntegerConstant(loc, idxTy, ext - 1);
597598
auto step = firBuilder.createIntegerConstant(loc, idxTy, 1);

flang/test/Lower/OpenACC/acc-reduction.f90

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,14 @@
189189
! CHECK: acc.yield %arg0 : !fir.box<!fir.array<?xi32>>
190190
! CHECK: }
191191

192+
! CHECK-LABEL: acc.reduction.recipe @reduction_add_section_lb0.ub9xlb0.ub19_ref_10x20xi32 : !fir.ref<!fir.array<10x20xi32>> reduction_operator <add> init {
193+
! CHECK: fir.do_loop %arg1 = %c0 to %c19 step %c1 {
194+
! CHECK: fir.do_loop %arg2 = %c0_0 to %c9 step %c1_1 {
195+
! CHECK: } combiner {
196+
! CHECK: fir.do_loop %arg2 = %c0 to %c19 step %c1 {
197+
! CHECK: fir.do_loop %arg3 = %c0_0 to %c9 step %c1_1 {
198+
! CHECK: }
199+
192200
! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_z32 : !fir.ref<complex<f32>> reduction_operator <mul> init {
193201
! CHECK: ^bb0(%{{.*}}: !fir.ref<complex<f32>>):
194202
! CHECK: %[[REAL:.*]] = arith.constant 1.000000e+00 : f32
@@ -1167,6 +1175,29 @@ subroutine acc_reduction_add_static_slice(a)
11671175
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a(11:20)"}
11681176
! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.ref<!fir.array<100xi32>>)
11691177

1178+
subroutine acc_reduction_add_static_slice_2d(a)
1179+
integer :: a(10,20)
1180+
!$acc parallel reduction(+:a(:10,:20))
1181+
!$acc end parallel
1182+
end subroutine
1183+
1184+
! CHECK-LABEL: func.func @_QPacc_reduction_add_static_slice_2d(
1185+
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10x20xi32>> {fir.bindc_name = "a"})
1186+
! CHECK: %[[C10:.*]] = arith.constant 10 : index
1187+
! CHECK: %[[C20:.*]] = arith.constant 20 : index
1188+
! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
1189+
! CHECK: %[[LB:.*]] = arith.constant 0 : index
1190+
! CHECK: %[[C1:.*]] = arith.constant 1 : index
1191+
! CHECK: %[[UB9:.*]] = arith.constant 9 : index
1192+
! CHECK: %[[STRIDE1:.*]] = arith.constant 10 : index
1193+
! CHECK: %[[BOUND0:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB9]] : index) extent(%[[C10]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
1194+
! CHECK: %[[UB19:.*]] = arith.constant 19 : index
1195+
! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB19]] : index) extent(%[[C20]] : index)
1196+
! stride(%[[STRIDE1]] : index) startIdx(%[[C1]] : index)
1197+
! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<10x20xi32>>) bounds(%[[BOUND0]], %[[BOUND1]]) ->
1198+
! !fir.ref<!fir.array<10x20xi32>> {name = "a(:10,:20)"}
1199+
! CHECK: acc.parallel reduction(@reduction_add_section_lb0.ub9xlb0.ub19_ref_10x20xi32 -> %[[RED]] : !fir.ref<!fir.array<10x20xi32>>)
1200+
11701201
subroutine acc_reduction_add_dynamic_extent_add(a)
11711202
integer :: a(:)
11721203
!$acc parallel reduction(+:a)

0 commit comments

Comments
 (0)