Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hoist do concurrent nest bounds/steps outside the nest (#114020) #198

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 30 additions & 11 deletions flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2131,18 +2131,37 @@ class FirConverter : public Fortran::lower::AbstractConverter {
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
mlir::Location loc = toLocation();
mlir::Operation *boundsAndStepIP = nullptr;

for (IncrementLoopInfo &info : incrementLoopNestInfo) {
info.loopVariable =
genLoopVariableAddress(loc, *info.loopVariableSym, info.isUnordered);
mlir::Value lowerValue = genControlValue(info.lowerExpr, info);
mlir::Value upperValue = genControlValue(info.upperExpr, info);
bool isConst = true;
mlir::Value stepValue = genControlValue(
info.stepExpr, info, info.isStructured() ? nullptr : &isConst);
// Use a temp variable for unstructured loops with non-const step.
if (!isConst) {
info.stepVariable = builder->createTemporary(loc, stepValue.getType());
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
mlir::Value lowerValue;
mlir::Value upperValue;
mlir::Value stepValue;

{
mlir::OpBuilder::InsertionGuard guard(*builder);

// Set the IP before the first loop in the nest so that all nest bounds
// and step values are created outside the nest.
if (boundsAndStepIP)
builder->setInsertionPointAfter(boundsAndStepIP);

info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
info.isUnordered);
lowerValue = genControlValue(info.lowerExpr, info);
upperValue = genControlValue(info.upperExpr, info);
bool isConst = true;
stepValue = genControlValue(info.stepExpr, info,
info.isStructured() ? nullptr : &isConst);
boundsAndStepIP = stepValue.getDefiningOp();

// Use a temp variable for unstructured loops with non-const step.
if (!isConst) {
info.stepVariable =
builder->createTemporary(loc, stepValue.getType());
boundsAndStepIP =
builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable);
}
}

// Structured loop - generate fir.do_loop.
Expand Down
102 changes: 102 additions & 0 deletions flang/test/Lower/do_concurrent.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s

! Simple tests for structured concurrent loops with loop-control.

pure function bar(n, m)
implicit none
integer, intent(in) :: n, m
integer :: bar
bar = n + m
end function

!CHECK-LABEL: sub1
subroutine sub1(n)
implicit none
integer :: n, m, i, j, k
integer, dimension(n) :: a
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index

!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index

!CHECK: %[[LB3:.*]] = arith.constant 5 : i32
!CHECK: %[[LB3_CVT:.*]] = fir.convert %[[LB3]] : (i32) -> index
!CHECK: %[[UB3:.*]] = arith.constant 10 : i32
!CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index

!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered

do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10)
a(i) = n
end do
end subroutine

!CHECK-LABEL: sub2
subroutine sub2(n)
implicit none
integer :: n, m, i, j
integer, dimension(n) :: a
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index
!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32>
!CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index
!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered
!CHECK: %[[LB2:.*]] = arith.constant 1 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index
!CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index
!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered
do concurrent(i=1:n)
do concurrent(j=1:bar(n*m, n/m))
a(i) = n
end do
end do
end subroutine


!CHECK-LABEL: unstructured
subroutine unstructured(inner_step)
integer(4) :: i, j, inner_step

!CHECK-NOT: cf.br
!CHECK-NOT: cf.cond_br
!CHECK: %[[LB1:.*]] = arith.constant 1 : i32
!CHECK: %[[LB1_CVT:.*]] = fir.convert %c1_i32 : (i32) -> i16
!CHECK: %[[UB1:.*]] = arith.constant 5 : i32
!CHECK: %[[UB1_CVT:.*]] = fir.convert %c5_i32 : (i32) -> i16
!CHECK: %[[STP1:.*]] = arith.constant 1 : i16

!CHECK-NOT: cf.br
!CHECK-NOT: cf.cond_br
!CHECK: %[[LB2:.*]] = arith.constant 3 : i32
!CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> i16
!CHECK: %[[UB2:.*]] = arith.constant 9 : i32
!CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> i16
!CHECK: %[[STP2:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
!CHECK: %[[STP2_CVT:.*]] = fir.convert %[[STP2]] : (i32) -> i16
!CHECK: fir.store %[[STP2_CVT]] to %{{.*}} : !fir.ref<i16>
!CHECK: cf.br ^[[I_LOOP_HEADER:.*]]

!CHECK: ^[[I_LOOP_HEADER]]:
!CHECK-NEXT: %{{.*}} = fir.load %{{.*}} : !fir.ref<i16>
!CHECK-NEXT: %{{.*}} = arith.constant 0 : i16
!CHECK-NEXT: %{{.*}} = arith.cmpi sgt, %{{.*}}, %{{.*}}: i16
!CHECK-NEXT: cf.cond_br %{{.*}}, ^[[J_LOOP_HEADER:.*]], ^{{.*}}

!CHECK: ^[[J_LOOP_HEADER]]:
!CHECK-NEXT: %[[RANGE:.*]] = arith.subi %[[UB2_CVT]], %[[LB2_CVT]] : i16
!CHECK-NEXT: %{{.*}} = arith.addi %[[RANGE]], %[[STP2_CVT]] : i16
!CHECK-NEXT: %{{.*}} = arith.divsi %{{.*}}, %[[STP2_CVT]] : i16
do concurrent (integer(2)::i=1:5, j=3:9:inner_step, i.ne.3)
goto (7, 7) i+1
print*, 'E:', i, j
7 continue
enddo
end subroutine unstructured
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %t/partially_nested.f90 -o - \
! RUN: | FileCheck %s --check-prefixes=DEVICE,COMMON

! This is temporarily disabled since the IR for `do concurrent` loops is different after
! https://github.com/llvm/llvm-project/pull/114020. This will be enabled again soon.
! XFAIL: true

!--- multi_range.f90
program main
integer, parameter :: n = 10
Expand Down