Skip to content

Commit

Permalink
Hoist do concurrent nest bounds/steps outside the nest (llvm#114020)
Browse files Browse the repository at this point in the history
If you have the following multi-range `do concurrent` loop:

```fortran
  do concurrent(i=1:n, j=1:bar(n*m, n/m))
    a(i) = n
  end do
```

Currently, flang generates the following IR:

```mlir
    fir.do_loop %arg1 = %42 to %44 step %c1 unordered {
      ...
      %53:3 = hlfir.associate %49 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
      %54:3 = hlfir.associate %52 {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
      %55 = fir.call @_QFPbar(%53#1, %54#1) fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
      hlfir.end_associate %53#1, %53#2 : !fir.ref<i32>, i1
      hlfir.end_associate %54#1, %54#2 : !fir.ref<i32>, i1
      %56 = fir.convert %55 : (i32) -> index
      ...
      fir.do_loop %arg2 = %46 to %56 step %c1_4 unordered {
        ...
      }
    }
```

However, if `bar` is impure, then we have a direct violation of the
standard:

```
C1143 A reference to an impure procedure shall not appear within a DO CONCURRENT construct.
```

Moreover, the standard describes the execution of `do concurrent`
construct in multiple stages:

```
11.1.7.4 Execution of a DO construct
...
11.1.7.4.2 DO CONCURRENT loop control
The concurrent-limit and concurrent-step expressions in the concurrent-control-list are evaluated. ...

11.1.7.4.3 The execution cycle
...
The block of a DO CONCURRENT construct is executed for every active combination of the index-name values.
Each execution of the block is an iteration. The executions may occur in any order.
```

From the above 2 points, it seems to me that execution is divided in
multiple consecutive stages: 11.1.7.4.2 is the stage where we evaluate
all control expressions including the step and then 11.1.7.4.3 is the
stage to execute the block of the concurrent loop itself using the
combination of possible iteration values.
  • Loading branch information
ergawy committed Oct 31, 2024
1 parent 59cd748 commit 37880d8
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 11 deletions.
41 changes: 30 additions & 11 deletions flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2131,18 +2131,37 @@ class FirConverter : public Fortran::lower::AbstractConverter {
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
mlir::Location loc = toLocation();
mlir::Operation *boundsAndStepIP = nullptr;

for (IncrementLoopInfo &info : incrementLoopNestInfo) {
info.loopVariable =
genLoopVariableAddress(loc, *info.loopVariableSym, info.isUnordered);
mlir::Value lowerValue = genControlValue(info.lowerExpr, info);
mlir::Value upperValue = genControlValue(info.upperExpr, info);
bool isConst = true;
mlir::Value stepValue = genControlValue(
info.stepExpr, info, info.isStructured() ? nullptr : &isConst);
// Use a temp variable for unstructured loops with non-const step.
if (!isConst) {
info.stepVariable = builder->createTemporary(loc, stepValue.getType());
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
mlir::Value lowerValue;
mlir::Value upperValue;
mlir::Value stepValue;

{
mlir::OpBuilder::InsertionGuard guard(*builder);

// Set the IP before the first loop in the nest so that all nest bounds
// and step values are created outside the nest.
if (boundsAndStepIP)
builder->setInsertionPointAfter(boundsAndStepIP);

info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
info.isUnordered);
lowerValue = genControlValue(info.lowerExpr, info);
upperValue = genControlValue(info.upperExpr, info);
bool isConst = true;
stepValue = genControlValue(info.stepExpr, info,
info.isStructured() ? nullptr : &isConst);
boundsAndStepIP = stepValue.getDefiningOp();

// Use a temp variable for unstructured loops with non-const step.
if (!isConst) {
info.stepVariable =
builder->createTemporary(loc, stepValue.getType());
boundsAndStepIP =
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
}
}

// Structured loop - generate fir.do_loop.
Expand Down
102 changes: 102 additions & 0 deletions flang/test/Lower/do_concurrent.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s

! Simple tests for structured concurrent loops with loop-control.

pure function bar(n, m)
implicit none
integer, intent(in) :: n, m
integer :: bar
bar = n + m
end function

!CHECK-LABEL: sub1
subroutine sub1(n)
implicit none
integer :: n, m, i, j, k
integer, dimension(n) :: a
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index

!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index

!CHECK: %[[LB3:.*]] = arith.constant 5 : i32
!CHECK: %[[LB3_CVT:.*]] = fir.convert %[[LB3]] : (i32) -> index
!CHECK: %[[UB3:.*]] = arith.constant 10 : i32
!CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index

!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered

do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10)
a(i) = n
end do
end subroutine

!CHECK-LABEL: sub2
subroutine sub2(n)
implicit none
integer :: n, m, i, j
integer, dimension(n) :: a
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32>
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
do concurrent(i=1:n)
do concurrent(j=1:bar(n*m, n/m))
a(i) = n
end do
end do
end subroutine


!CHECK-LABEL: unstructured
subroutine unstructured(inner_step)
integer(4) :: i, j, inner_step

!CHECK-NOT: cf.br
!CHECK-NOT: cf.cond_br
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %c1_i32 : (i32) -> i16
!CHECK: %[[UB1:.*]] = arith.constant 5 : i32
!CHECK: %[[UB1_CVT:.*]] = fir.convert %c5_i32 : (i32) -> i16
!CHECK: %[[STP1:.*]] = arith.constant 1 : i16

!CHECK-NOT: cf.br
!CHECK-NOT: cf.cond_br
!CHECK: %[[LB2:.*]] = arith.constant 3 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> i16
!CHECK: %[[UB2:.*]] = arith.constant 9 : i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> i16
!CHECK: %[[STP2:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
!CHECK: %[[STP2_CVT:.*]] = fir.convert %[[STP2]] : (i32) -> i16
!CHECK: fir.store %[[STP2_CVT]] to %{{.*}} : !fir.ref<i16>
!CHECK: cf.br ^[[I_LOOP_HEADER:.*]]

!CHECK: ^[[I_LOOP_HEADER]]:
!CHECK-NEXT: %{{.*}} = fir.load %{{.*}} : !fir.ref<i16>
!CHECK-NEXT: %{{.*}} = arith.constant 0 : i16
!CHECK-NEXT: %{{.*}} = arith.cmpi sgt, %{{.*}}, %{{.*}}: i16
!CHECK-NEXT: cf.cond_br %{{.*}}, ^[[J_LOOP_HEADER:.*]], ^{{.*}}

!CHECK: ^[[J_LOOP_HEADER]]:
!CHECK-NEXT: %[[RANGE:.*]] = arith.subi %[[UB2_CVT]], %[[LB2_CVT]] : i16
!CHECK-NEXT: %{{.*}} = arith.addi %[[RANGE]], %[[STP2_CVT]] : i16
!CHECK-NEXT: %{{.*}} = arith.divsi %{{.*}}, %[[STP2_CVT]] : i16
do concurrent (integer(2)::i=1:5, j=3:9:inner_step, i.ne.3)
goto (7, 7) i+1
print*, 'E:', i, j
7 continue
enddo
end subroutine unstructured

0 comments on commit 37880d8

Please sign in to comment.