diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt index e0aebd0714c8f..b85ee7e861a4f 100644 --- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt @@ -26,6 +26,7 @@ add_flang_library(FlangOpenMPTransforms FIRSupport FortranSupport HLFIRDialect + FortranUtils MLIR_DEPS ${dialect_libs} diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index de3b8d730072f..6c71924000842 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -6,17 +6,23 @@ // //===----------------------------------------------------------------------===// +#include "flang/Optimizer/Builder/DirectivesCommon.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/OpenMP/Utils.h" #include "flang/Support/OpenMP-utils.h" +#include "flang/Utils/OpenMP.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/IRMapping.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" namespace flangomp { #define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS @@ -107,6 +113,33 @@ struct InductionVariableInfo { using InductionVariableInfos = llvm::SmallVector; +/// Collect the list of values used inside the loop but defined outside of it. +void collectLoopLiveIns(fir::DoConcurrentLoopOp loop, + llvm::SmallVectorImpl &liveIns) { + llvm::SmallDenseSet seenValues; + llvm::SmallPtrSet seenOps; + + for (auto [lb, ub, st] : llvm::zip_equal( + loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) { + liveIns.push_back(lb); + liveIns.push_back(ub); + liveIns.push_back(st); + } + + mlir::visitUsedValuesDefinedAbove( + loop.getRegion(), [&](mlir::OpOperand *operand) { + if (!seenValues.insert(operand->get()).second) + return; + + mlir::Operation *definingOp = operand->get().getDefiningOp(); + // We want to collect ops corresponding to live-ins only once. + if (definingOp && !seenOps.insert(definingOp).second) + return; + + liveIns.push_back(operand->get()); + }); +} + /// Collects values that are local to a loop: "loop-local values". A loop-local /// value is one that is used exclusively inside the loop but allocated outside /// of it. This usually corresponds to temporary values that are used inside the @@ -168,6 +201,52 @@ static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion, class DoConcurrentConversion : public mlir::OpConversionPattern { +private: + struct TargetDeclareShapeCreationInfo { + // Note: We use `std::vector` (rather than `llvm::SmallVector` as usual) to + // interface more easily `ShapeShiftOp::getOrigins()` which returns + // `std::vector`. + std::vector startIndices; + std::vector extents; + + TargetDeclareShapeCreationInfo(mlir::Value liveIn) { + mlir::Value shape = nullptr; + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present(liveInDefiningOp); + + if (declareOp != nullptr) + shape = declareOp.getShape(); + + if (!shape) + return; + + auto shapeOp = + mlir::dyn_cast_if_present(shape.getDefiningOp()); + auto shapeShiftOp = + mlir::dyn_cast_if_present(shape.getDefiningOp()); + + if (!shapeOp && !shapeShiftOp) + TODO(liveIn.getLoc(), + "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are" + "not supported yet."); + + if (shapeShiftOp != nullptr) + startIndices = shapeShiftOp.getOrigins(); + + extents = shapeOp != nullptr + ? std::vector(shapeOp.getExtents().begin(), + shapeOp.getExtents().end()) + : shapeShiftOp.getExtents(); + } + + bool isShapedValue() const { return !extents.empty(); } + bool isShapeShiftedValue() const { return !startIndices.empty(); } + }; + + using LiveInShapeInfoMap = + llvm::DenseMap; + public: using mlir::OpConversionPattern::OpConversionPattern; @@ -182,10 +261,6 @@ class DoConcurrentConversion mlir::LogicalResult matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - if (mapToDevice) - return doLoop.emitError( - "not yet implemented: Mapping `do concurrent` loops to device"); - looputils::InductionVariableInfos ivInfos; auto loop = mlir::cast( doLoop.getRegion().back().getTerminator()); @@ -196,20 +271,72 @@ class DoConcurrentConversion for (mlir::Value indVar : *indVars) ivInfos.emplace_back(loop, indVar); + llvm::SmallVector loopNestLiveIns; + looputils::collectLoopLiveIns(loop, loopNestLiveIns); + assert(!loopNestLiveIns.empty()); + llvm::SetVector locals; looputils::collectLoopLocalValues(loop, locals); + // We do not want to map "loop-local" values to the device through + // `omp.map.info` ops. Therefore, we remove them from the list of live-ins. + loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns, + [&](mlir::Value liveIn) { + return locals.contains(liveIn); + }), + loopNestLiveIns.end()); + + mlir::omp::TargetOp targetOp; + mlir::omp::LoopNestOperands loopNestClauseOps; + mlir::IRMapping mapper; + + if (mapToDevice) { + mlir::ModuleOp module = doLoop->getParentOfType(); + bool isTargetDevice = + llvm::cast(*module) + .getIsTargetDevice(); + + mlir::omp::TargetOperands targetClauseOps; + genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, + loopNestClauseOps, + isTargetDevice ? nullptr : &targetClauseOps); + + LiveInShapeInfoMap liveInShapeInfoMap; + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(doLoop->getParentOfType())); + + for (mlir::Value liveIn : loopNestLiveIns) { + targetClauseOps.mapVars.push_back( + genMapInfoOpForLiveIn(builder, liveIn)); + liveInShapeInfoMap.insert( + {liveIn, TargetDeclareShapeCreationInfo(liveIn)}); + } + + targetOp = + genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, + targetClauseOps, loopNestClauseOps, liveInShapeInfoMap); + genTeamsOp(doLoop.getLoc(), rewriter); + } + mlir::omp::ParallelOp parallelOp = genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper); - mlir::omp::LoopNestOperands loopNestClauseOps; - genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, - loopNestClauseOps); + + // Only set as composite when part of `distribute parallel do`. + parallelOp.setComposite(mapToDevice); + + if (!mapToDevice) + genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper, + loopNestClauseOps); for (mlir::Value local : locals) looputils::localizeLoopLocalValue(local, parallelOp.getRegion(), rewriter); + if (mapToDevice) + genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true); + mlir::omp::LoopNestOp ompLoopNest = genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps, /*isComposite=*/mapToDevice); @@ -284,11 +411,11 @@ class DoConcurrentConversion return result; } - void - genLoopNestClauseOps(mlir::Location loc, - mlir::ConversionPatternRewriter &rewriter, - fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, - mlir::omp::LoopNestOperands &loopNestClauseOps) const { + void genLoopNestClauseOps( + mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, + mlir::omp::LoopNestOperands &loopNestClauseOps, + mlir::omp::TargetOperands *targetClauseOps = nullptr) const { assert(loopNestClauseOps.loopLowerBounds.empty() && "Loop nest bounds were already emitted!"); @@ -297,11 +424,21 @@ class DoConcurrentConversion bounds.push_back(var.getDefiningOp()->getResult(0)); }; + auto hostEvalCapture = [&](mlir::Value var, + llvm::SmallVectorImpl &bounds) { + populateBounds(var, bounds); + + // Ensure that loop-nest bounds are evaluated in the host and forwarded to + // the nested omp constructs when we map to the device. + if (targetClauseOps) + targetClauseOps->hostEvalVars.push_back(var); + }; + for (auto [lb, ub, st] : llvm::zip_equal( loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) { - populateBounds(lb, loopNestClauseOps.loopLowerBounds); - populateBounds(ub, loopNestClauseOps.loopUpperBounds); - populateBounds(st, loopNestClauseOps.loopSteps); + hostEvalCapture(lb, loopNestClauseOps.loopLowerBounds); + hostEvalCapture(ub, loopNestClauseOps.loopUpperBounds); + hostEvalCapture(st, loopNestClauseOps.loopSteps); } loopNestClauseOps.loopInclusive = rewriter.getUnitAttr(); @@ -439,6 +576,247 @@ class DoConcurrentConversion return loopNestOp; } + void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn, + mlir::Value rawAddr, + llvm::SmallVectorImpl &boundsOps) const { + fir::ExtendedValue extVal = + hlfir::translateToExtendedValue(rawAddr.getLoc(), builder, + hlfir::Entity{liveIn}, + /*contiguousHint=*/ + true) + .first; + fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr( + builder, rawAddr, /*isOptional=*/false, rawAddr.getLoc()); + boundsOps = fir::factory::genImplicitBoundsOps( + builder, info, extVal, + /*dataExvIsAssumedSize=*/false, rawAddr.getLoc()); + } + + mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder, + mlir::Value liveIn) const { + mlir::Value rawAddr = liveIn; + llvm::StringRef name; + + mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp(); + auto declareOp = + mlir::dyn_cast_if_present(liveInDefiningOp); + + if (declareOp != nullptr) { + // Use the raw address to avoid unboxing `fir.box` values whenever + // possible. Put differently, if we have access to the direct value memory + // reference/address, we use it. + rawAddr = declareOp.getOriginalBase(); + name = declareOp.getUniqName(); + } + + if (!llvm::isa(rawAddr.getType())) { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointAfter(liveInDefiningOp); + auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal); + rawAddr = copyVal; + } + + mlir::Type liveInType = liveIn.getType(); + mlir::Type eleType = liveInType; + if (auto refType = mlir::dyn_cast(liveInType)) + eleType = refType.getElementType(); + + llvm::omp::OpenMPOffloadMappingFlags mapFlag = + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; + mlir::omp::VariableCaptureKind captureKind = + mlir::omp::VariableCaptureKind::ByRef; + + if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { + captureKind = mlir::omp::VariableCaptureKind::ByCopy; + } else if (!fir::isa_builtin_cptr_type(eleType)) { + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; + } + + llvm::SmallVector boundsOps; + genBoundsOps(builder, liveIn, rawAddr, boundsOps); + + return Fortran::utils::openmp::createMapInfoOp( + builder, liveIn.getLoc(), rawAddr, + /*varPtrPtr=*/{}, name.str(), boundsOps, + /*members=*/{}, + /*membersIndex=*/mlir::ArrayAttr{}, + static_cast< + std::underlying_type_t>( + mapFlag), + captureKind, rawAddr.getType()); + } + + mlir::omp::TargetOp + genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + mlir::IRMapping &mapper, llvm::ArrayRef mappedVars, + mlir::omp::TargetOperands &clauseOps, + mlir::omp::LoopNestOperands &loopNestClauseOps, + const LiveInShapeInfoMap &liveInShapeInfoMap) const { + auto targetOp = rewriter.create(loc, clauseOps); + auto argIface = llvm::cast(*targetOp); + + mlir::Region ®ion = targetOp.getRegion(); + + llvm::SmallVector regionArgTypes; + llvm::SmallVector regionArgLocs; + + for (auto var : llvm::concat(clauseOps.hostEvalVars, + clauseOps.mapVars)) { + regionArgTypes.push_back(var.getType()); + regionArgLocs.push_back(var.getLoc()); + } + + rewriter.createBlock(®ion, {}, regionArgTypes, regionArgLocs); + fir::FirOpBuilder builder( + rewriter, + fir::getKindMapping(targetOp->getParentOfType())); + + // Within the loop, it is possible that we discover other values that need + // to be mapped to the target region (the shape info values for arrays, for + // example). Therefore, the map block args might be extended and resized. + // Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make + // sure we access the proper vector of data. + int idx = 0; + for (auto [mapInfoOp, mappedVar] : + llvm::zip_equal(clauseOps.mapVars, mappedVars)) { + auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); + hlfir::DeclareOp liveInDeclare = + genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx], + miOp, liveInShapeInfoMap.at(mappedVar)); + ++idx; + + // If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably + // need to "unpack" the box by getting the defining op of it's value. + // However, we did not hit this case in reality yet so leaving it as a + // todo for now. + if (mlir::isa(mappedVar.getDefiningOp())) + TODO(mappedVar.getLoc(), + "Mapped variabled defined by `BoxAddrOp` are not supported yet"); + + auto mapHostValueToDevice = [&](mlir::Value hostValue, + mlir::Value deviceValue) { + if (!llvm::isa(hostValue.getType())) + mapper.map(hostValue, + builder.loadIfRef(hostValue.getLoc(), deviceValue)); + else + mapper.map(hostValue, deviceValue); + }; + + mapHostValueToDevice(mappedVar, liveInDeclare.getOriginalBase()); + + if (auto origDeclareOp = mlir::dyn_cast_if_present( + mappedVar.getDefiningOp())) + mapHostValueToDevice(origDeclareOp.getBase(), liveInDeclare.getBase()); + } + + for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(), + clauseOps.hostEvalVars)) + mapper.map(hostEval, arg); + + for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) { + loopNestClauseOps.loopLowerBounds[i] = + mapper.lookup(loopNestClauseOps.loopLowerBounds[i]); + loopNestClauseOps.loopUpperBounds[i] = + mapper.lookup(loopNestClauseOps.loopUpperBounds[i]); + loopNestClauseOps.loopSteps[i] = + mapper.lookup(loopNestClauseOps.loopSteps[i]); + } + + // Check if cloning the bounds introduced any dependency on the outer + // region. If so, then either clone them as well if they are + // MemoryEffectFree, or else copy them to a new temporary and add them to + // the map and block_argument lists and replace their uses with the new + // temporary. + Fortran::utils::openmp::cloneOrMapRegionOutsiders(builder, targetOp); + rewriter.setInsertionPoint( + rewriter.create(targetOp.getLoc())); + + return targetOp; + } + + hlfir::DeclareOp genLiveInDeclare( + fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp, + mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp, + const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { + mlir::Type liveInType = liveInArg.getType(); + std::string liveInName = liveInMapInfoOp.getName().has_value() + ? liveInMapInfoOp.getName().value().str() + : std::string(""); + if (fir::isa_ref_type(liveInType)) + liveInType = fir::unwrapRefType(liveInType); + + mlir::Value shape = [&]() -> mlir::Value { + if (!targetShapeCreationInfo.isShapedValue()) + return {}; + + llvm::SmallVector extentOperands; + llvm::SmallVector startIndexOperands; + + if (targetShapeCreationInfo.isShapeShiftedValue()) { + llvm::SmallVector shapeShiftOperands; + + size_t shapeIdx = 0; + for (auto [startIndex, extent] : + llvm::zip_equal(targetShapeCreationInfo.startIndices, + targetShapeCreationInfo.extents)) { + shapeShiftOperands.push_back( + Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, startIndex, + liveInName + ".start_idx.dim" + std::to_string(shapeIdx))); + shapeShiftOperands.push_back( + Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } + + auto shapeShiftType = fir::ShapeShiftType::get( + builder.getContext(), shapeShiftOperands.size() / 2); + return builder.create( + liveInArg.getLoc(), shapeShiftType, shapeShiftOperands); + } + + llvm::SmallVector shapeOperands; + size_t shapeIdx = 0; + for (auto extent : targetShapeCreationInfo.extents) { + shapeOperands.push_back(Fortran::utils::openmp::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } + + return builder.create(liveInArg.getLoc(), shapeOperands); + }(); + + return builder.create(liveInArg.getLoc(), liveInArg, + liveInName, shape); + } + + mlir::omp::TeamsOp + genTeamsOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto teamsOp = rewriter.create( + loc, /*clauses=*/mlir::omp::TeamsOperands{}); + + rewriter.createBlock(&teamsOp.getRegion()); + rewriter.setInsertionPoint(rewriter.create(loc)); + + return teamsOp; + } + + mlir::omp::DistributeOp + genDistributeOp(mlir::Location loc, + mlir::ConversionPatternRewriter &rewriter) const { + auto distOp = rewriter.create( + loc, /*clauses=*/mlir::omp::DistributeOperands{}); + + rewriter.createBlock(&distOp.getRegion()); + return distOp; + } + bool mapToDevice; llvm::DenseSet &concurrentLoopsToSkip; mlir::SymbolTable &moduleSymbolTable; diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 new file mode 100644 index 0000000000000..fd13f9c6babe0 --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -0,0 +1,83 @@ +! Tests mapping of a basic `do concurrent` loop to +! `!$omp target teams distribute parallel do`. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s + +program do_concurrent_basic + implicit none + integer :: a(10) + integer :: i + + ! CHECK: %[[I_ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "i"} + ! CHECK: %[[I_ORIG_DECL:.*]]:2 = hlfir.declare %[[I_ORIG_ALLOC]] + + ! CHECK: %[[A_ADDR:.*]] = fir.address_of(@_QFEa) + ! CHECK: %[[A_SHAPE:.*]] = fir.shape %[[A_EXTENT:.*]] : (index) -> !fir.shape<1> + ! CHECK: %[[A_ORIG_DECL:.*]]:2 = hlfir.declare %[[A_ADDR]](%[[A_SHAPE]]) + + ! CHECK-NOT: fir.do_loop + + ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 + ! CHECK: %[[HOST_LB:.*]] = fir.convert %[[C1]] : (i32) -> index + ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 + ! CHECK: %[[HOST_UB:.*]] = fir.convert %[[C10]] : (i32) -> index + ! CHECK: %[[HOST_STEP:.*]] = arith.constant 1 : index + + ! CHECK: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1 + ! CHECK: %[[C0:.*]] = arith.constant 0 : index + ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %{{c1.*}} : index + + ! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index) + ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index) + ! CHECK-SAME: extent(%[[A_EXTENT]] : index) + + ! CHECK: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}}) + ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) + + ! CHECK: omp.target + ! CHECK-SAME: host_eval(%[[HOST_LB]] -> %[[LB:[[:alnum:]]+]], %[[HOST_UB]] -> %[[UB:[[:alnum:]]+]], %[[HOST_STEP]] -> %[[STEP:[[:alnum:]]+]] : index, index, index) + ! CHECK-SAME: map_entries( + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %{{[[:alnum:]]+}} -> %{{[^,]+}}, + ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], + ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]] + + ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] + ! CHECK: omp.teams { + ! CHECK-NEXT: omp.parallel { + + ! CHECK-NEXT: %[[ITER_VAR:.*]] = fir.alloca i32 {bindc_name = "i"} + ! CHECK-NEXT: %[[BINDING:.*]]:2 = hlfir.declare %[[ITER_VAR]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + + ! CHECK-NEXT: omp.distribute { + ! CHECK-NEXT: omp.wsloop { + + ! CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]]) : index = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + ! CHECK-NEXT: %[[IV_IDX:.*]] = fir.convert %[[ARG0]] : (index) -> i32 + ! CHECK-NEXT: fir.store %[[IV_IDX]] to %[[BINDING]]#0 : !fir.ref + ! CHECK-NEXT: %[[IV_VAL1:.*]] = fir.load %[[BINDING]]#0 : !fir.ref + ! CHECK-NEXT: %[[IV_VAL2:.*]] = fir.load %[[BINDING]]#0 : !fir.ref + ! CHECK-NEXT: %[[IV_VAL_I64:.*]] = fir.convert %[[IV_VAL2]] : (i32) -> i64 + ! CHECK-NEXT: %[[ARR_ACCESS:.*]] = hlfir.designate %[[A_DEV_DECL]]#0 (%[[IV_VAL_I64]]) : (!fir.ref>, i64) -> !fir.ref + ! CHECK-NEXT: hlfir.assign %[[IV_VAL1]] to %[[ARR_ACCESS]] : i32, !fir.ref + ! CHECK-NEXT: omp.yield + ! CHECK-NEXT: } + + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } {omp.composite} + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } + ! CHECK-NEXT: omp.terminator + ! CHECK-NEXT: } + do concurrent (i=1:10) + a(i) = i + end do + + ! CHECK-NOT: fir.do_loop +end program do_concurrent_basic diff --git a/flang/test/Transforms/DoConcurrent/basic_device.mlir b/flang/test/Transforms/DoConcurrent/basic_device.mlir index 0ca48943864c8..fa511c3d46d58 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.mlir +++ b/flang/test/Transforms/DoConcurrent/basic_device.mlir @@ -1,4 +1,4 @@ -// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" -verify-diagnostics %s +// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" %s -o - | FileCheck %s func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_basic"} { %2 = fir.address_of(@_QFEa) : !fir.ref> @@ -11,8 +11,12 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas %8 = fir.convert %c10_i32 : (i32) -> index %c1 = arith.constant 1 : index - // expected-error@+2 {{not yet implemented: Mapping `do concurrent` loops to device}} - // expected-error@below {{failed to legalize operation 'fir.do_concurrent'}} + // CHECK: omp.target + // CHECK: omp.teams + // CHECK: omp.parallel + // CHECK: omp.distribute + // CHECK: omp.wsloop + // CHECK: omp.loop_nest fir.do_concurrent { %0 = fir.alloca i32 {bindc_name = "i"} %1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 b/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 new file mode 100644 index 0000000000000..b467747293ace --- /dev/null +++ b/flang/test/Transforms/DoConcurrent/use_loop_bounds_in_body.f90 @@ -0,0 +1,40 @@ +! Tests that when a loop bound is used in the body, that the mapped version of +! the loop bound (rather than the host-eval one) is the one used inside the loop. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s +! RUN: bbc -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=device %s -o - \ +! RUN: | FileCheck %s + +subroutine foo(a, n) + implicit none + integer :: i, n + real, dimension(n) :: a + + do concurrent (i=1:n) + a(i) = n + end do +end subroutine + +! CHECK-LABEL: func.func @_QPfoo +! CHECK: omp.target +! CHECK-SAME: host_eval(%{{.*}} -> %{{.*}}, %{{.*}} -> %[[N_HOST_EVAL:.*]], %{{.*}} -> %{{.*}} : index, index, index) +! CHECK-SAME: map_entries({{[^[:space:]]*}} -> {{[^[:space:]]*}}, +! CHECK-SAME: {{[^[:space:]]*}} -> {{[^[:space:]]*}}, {{[^[:space:]]*}} -> {{[^[:space:]]*}}, +! CHECK-SAME: {{[^[:space:]]*}} -> {{[^[:space:]]*}}, {{[^[:space:]]*}} -> %[[N_MAP_ARG:[^[:space:]]*]], {{.*}}) { +! CHECK: %[[N_MAPPED:.*]]:2 = hlfir.declare %[[N_MAP_ARG]] {uniq_name = "_QFfooEn"} +! CHECK: omp.teams { +! CHECK: omp.parallel { +! CHECK: omp.distribute { +! CHECK: omp.wsloop { +! CHECK: omp.loop_nest (%{{.*}}) : index = (%{{.*}}) to (%[[N_HOST_EVAL]]) inclusive step (%{{.*}}) { +! CHECK: %[[N_VAL:.*]] = fir.load %[[N_MAPPED]]#0 : !fir.ref +! CHECK: %[[N_VAL_CVT:.*]] = fir.convert %[[N_VAL]] : (i32) -> f32 +! CHECK: hlfir.assign %[[N_VAL_CVT]] to {{.*}} +! CHECK-NEXT: omp.yield +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: }