From f976a2e809af31846dd1181d14531ac669a6b122 Mon Sep 17 00:00:00 2001 From: ergawy Date: Thu, 22 Aug 2024 04:26:25 -0500 Subject: [PATCH] [flang][OpenMP][DoConcurrent] Support `fir.shape_shift` values --- .../OpenMP/DoConcurrentConversion.cpp | 238 +++++++++++------- .../Transforms/DoConcurrent/basic_device.f90 | 10 +- .../DoConcurrent/runtime_sized_array.f90 | 13 +- 3 files changed, 167 insertions(+), 94 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index b1fb2d8431a530..8c0cf026d87e04 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -158,6 +158,36 @@ mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc, return tripCount; } +mlir::Value mapTemporaryValue(fir::FirOpBuilder &builder, + mlir::omp::TargetOp targetOp, mlir::Value val, + std::string name = "") { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointAfterValue(val); + auto copyVal = builder.createTemporary(val.getLoc(), val.getType()); + builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); + + llvm::SmallVector bounds; + builder.setInsertionPoint(targetOp); + mlir::Value mapOp = createMapInfoOp( + builder, copyVal.getLoc(), copyVal, + /*varPtrPtr=*/mlir::Value{}, name, bounds, + /*members=*/llvm::SmallVector{}, + /*membersIndex=*/mlir::ArrayAttr{}, + static_cast>( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), + mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); + targetOp.getMapVarsMutable().append(mapOp); + + mlir::Region &targetRegion = targetOp.getRegion(); + mlir::Block *targetEntryBlock = &targetRegion.getBlocks().front(); + mlir::Value clonedValArg = + targetRegion.addArgument(copyVal.getType(), copyVal.getLoc()); + builder.setInsertionPointToStart(targetEntryBlock); + auto loadOp = + builder.create(clonedValArg.getLoc(), clonedValArg); + return loadOp.getResult(); +} + /// Check if cloning the bounds introduced any dependency on the outer region. /// If so, then either clone them as well if they are MemoryEffectFree, or else /// copy them to a new temporary and add them to the map and block_argument @@ -186,31 +216,9 @@ void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder, return use.getOwner()->getBlock() == targetEntryBlock; }); } else { - mlir::OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPointAfter(valOp); - auto copyVal = builder.createTemporary(val.getLoc(), val.getType()); - builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); - - llvm::SmallVector bounds; - std::stringstream name; - builder.setInsertionPoint(targetOp); - mlir::Value mapOp = createMapInfoOp( - builder, copyVal.getLoc(), copyVal, - /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, - /*members=*/llvm::SmallVector{}, - /*membersIndex=*/mlir::ArrayAttr{}, - static_cast< - std::underlying_type_t>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), - mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); - targetOp.getMapVarsMutable().append(mapOp); - mlir::Value clonedValArg = - targetRegion.addArgument(copyVal.getType(), copyVal.getLoc()); - builder.setInsertionPointToStart(targetEntryBlock); - auto loadOp = - builder.create(clonedValArg.getLoc(), clonedValArg); + mlir::Value mappedTemp = mapTemporaryValue(builder, targetOp, val); val.replaceUsesWithIf( - loadOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) { + mappedTemp, [targetEntryBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == targetEntryBlock; }); } @@ -754,17 +762,18 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { if (mapToDevice) { mlir::omp::TargetOperands targetClauseOps; + LiveInShapeInfoMap liveInShapeInfoMap; // The outermost loop will contain all the live-in values in all nested // loops since live-in values are collected recursively for all nested // ops. for (mlir::Value liveIn : loopNestLiveIns) { - targetClauseOps.mapVars.push_back( - genMapInfoOpForLiveIn(rewriter, liveIn, liveInToName)); + targetClauseOps.mapVars.push_back(genMapInfoOpForLiveIn( + rewriter, liveIn, liveInToName, liveInShapeInfoMap[liveIn])); } targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, - targetClauseOps); + targetClauseOps, liveInShapeInfoMap); genTeamsOp(doLoop.getLoc(), rewriter); } @@ -816,42 +825,76 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { } private: - void genBoundsOps(mlir::ConversionPatternRewriter &rewriter, - mlir::Location loc, mlir::Value shape, - llvm::SmallVectorImpl &boundsOps) const { + struct TargetDeclareShapeCreationInfo { + std::vector startIndices{}; + std::vector extents{}; + + bool isShapedValue() const { return !extents.empty(); } + bool isShapeShiftedValue() const { return !startIndices.empty(); } + }; + + using LiveInShapeInfoMap = + llvm::DenseMap; + + void + genBoundsOps(mlir::ConversionPatternRewriter &rewriter, mlir::Location loc, + mlir::Value shape, llvm::SmallVectorImpl &boundsOps, + TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { if (shape == nullptr) { return; } auto shapeOp = mlir::dyn_cast_if_present(shape.getDefiningOp()); + auto shapeShiftOp = + mlir::dyn_cast_if_present(shape.getDefiningOp()); - if (shapeOp == nullptr) - TODO(loc, "Shapes not defined by shape op's are not supported yet."); + if (shapeOp == nullptr && shapeShiftOp == nullptr) + TODO(loc, + "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are " + "not supported yet."); - auto extents = shapeOp.getExtents(); + auto extents = shapeOp != nullptr + ? std::vector(shapeOp.getExtents().begin(), + shapeOp.getExtents().end()) + : shapeShiftOp.getExtents(); - auto genBoundsOp = [&](mlir::Value extent) { - mlir::Type extentType = extent.getType(); - auto lb = rewriter.create( - loc, extentType, rewriter.getIntegerAttr(extentType, 0)); - // TODO I think this caluclation might not be correct. But this is how - // it is done in PFT->OpenMP lowering. So keeping it like this until we - // double check. - mlir::Value ub = rewriter.create(loc, extent, lb); + mlir::Type idxType = extents.front().getType(); + + auto one = rewriter.create( + loc, idxType, rewriter.getIntegerAttr(idxType, 1)); + // For non-shifted values, that starting index is the default Fortran + // value: 1. + std::vector startIndices = + shapeOp != nullptr ? std::vector(extents.size(), one) + : shapeShiftOp.getOrigins(); + + auto genBoundsOp = [&](mlir::Value startIndex, mlir::Value extent) { + // We map the entire range of data by default, therefore, we always map + // from the start. + auto normalizedLB = rewriter.create( + loc, idxType, rewriter.getIntegerAttr(idxType, 0)); + + mlir::Value ub = rewriter.create(loc, extent, one); return rewriter.create( - loc, rewriter.getType(), lb, ub, extent, - mlir::Value{}, false, mlir::Value{}); + loc, rewriter.getType(), normalizedLB, ub, + extent, + /*stride=*/mlir::Value{}, /*stride_in_bytes=*/false, startIndex); }; - for (auto extent : extents) - boundsOps.push_back(genBoundsOp(extent)); + for (auto [startIndex, extent] : llvm::zip_equal(startIndices, extents)) + boundsOps.push_back(genBoundsOp(startIndex, extent)); + + if (shapeShiftOp != nullptr) + targetShapeCreationInfo.startIndices = std::move(startIndices); + targetShapeCreationInfo.extents = std::move(extents); } mlir::omp::MapInfoOp genMapInfoOpForLiveIn( mlir::ConversionPatternRewriter &rewriter, mlir::Value liveIn, - const llvm::DenseMap &liveInToName) const { + const llvm::DenseMap &liveInToName, + TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { mlir::Value rawAddr = liveIn; mlir::Value shape = nullptr; std::string name = ""; @@ -898,7 +941,8 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { } llvm::SmallVector boundsOps; - genBoundsOps(rewriter, liveIn.getLoc(), shape, boundsOps); + genBoundsOps(rewriter, liveIn.getLoc(), shape, boundsOps, + targetShapeCreationInfo); return Fortran::lower::omp::internal::createMapInfoOp( rewriter, liveIn.getLoc(), rawAddr, @@ -911,11 +955,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { captureKind, rawAddr.getType()); } - mlir::omp::TargetOp genTargetOp(mlir::Location loc, - mlir::ConversionPatternRewriter &rewriter, - mlir::IRMapping &mapper, - llvm::ArrayRef liveIns, - mlir::omp::TargetOperands &clauseOps) const { + mlir::omp::TargetOp + genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + mlir::IRMapping &mapper, + const llvm::ArrayRef liveIns, + const mlir::omp::TargetOperands &clauseOps, + const LiveInShapeInfoMap &liveInShapeInfoMap) const { auto targetOp = rewriter.create(loc, clauseOps); mlir::Region ®ion = targetOp.getRegion(); @@ -930,14 +975,17 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { } rewriter.createBlock(®ion, {}, liveInTypes, liveInLocs); - fir::FirOpBuilder firBuilder( + fir::FirOpBuilder builder( rewriter, fir::getKindMapping(targetOp->getParentOfType())); - for (auto [liveIn, arg, mapInfoOp] : - llvm::zip_equal(liveIns, region.getArguments(), clauseOps.mapVars)) { + size_t argIdx = 0; + for (auto [liveIn, mapInfoOp] : + llvm::zip_equal(liveIns, clauseOps.mapVars)) { auto miOp = mlir::cast(mapInfoOp.getDefiningOp()); - hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp); + hlfir::DeclareOp liveInDeclare = + genLiveInDeclare(builder, targetOp, region.getArgument(argIdx), miOp, + liveInShapeInfoMap.at(liveIn)); // TODO If `liveIn.getDefiningOp()` is a `fir::BoxAddrOp`, we probably // need to "unpack" the box by getting the defining op of it's value. @@ -945,9 +993,8 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { // todo for now. if (!llvm::isa(liveIn.getType())) - mapper.map(liveIn, - firBuilder.loadIfRef(liveIn.getLoc(), - liveInDeclare.getOriginalBase())); + mapper.map(liveIn, builder.loadIfRef(liveIn.getLoc(), + liveInDeclare.getOriginalBase())); else mapper.map(liveIn, liveInDeclare.getOriginalBase()); @@ -955,56 +1002,75 @@ class DoConcurrentConversion : public mlir::OpConversionPattern { liveIn.getDefiningOp())) { mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase()); } + ++argIdx; } - Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder, - targetOp); + Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(builder, targetOp); rewriter.setInsertionPoint( rewriter.create(targetOp.getLoc())); return targetOp; } - hlfir::DeclareOp - genLiveInDeclare(mlir::ConversionPatternRewriter &rewriter, - mlir::Value liveInArg, - mlir::omp::MapInfoOp liveInMapInfoOp) const { + hlfir::DeclareOp genLiveInDeclare( + fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp, + mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp, + const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const { mlir::Type liveInType = liveInArg.getType(); + std::string liveInName = liveInMapInfoOp.getName().has_value() + ? liveInMapInfoOp.getName().value().str() + : std::string(""); if (fir::isa_ref_type(liveInType)) liveInType = fir::unwrapRefType(liveInType); mlir::Value shape = [&]() -> mlir::Value { - if (hlfir::isFortranScalarNumericalType(liveInType)) + if (!targetShapeCreationInfo.isShapedValue()) return {}; - if (hlfir::isFortranArrayObject(liveInType)) { - llvm::SmallVector shapeOpOperands; + llvm::SmallVector extentOperands; + llvm::SmallVector startIndexOperands; + + if (targetShapeCreationInfo.isShapeShiftedValue()) { + llvm::SmallVector shapeShiftOperands; + + size_t shapeIdx = 0; + for (auto [startIndex, extent] : + llvm::zip_equal(targetShapeCreationInfo.startIndices, + targetShapeCreationInfo.extents)) { + shapeShiftOperands.push_back( + Fortran::lower::omp::internal::mapTemporaryValue( + builder, targetOp, startIndex, + liveInName + ".start_idx.dim" + std::to_string(shapeIdx))); + shapeShiftOperands.push_back( + Fortran::lower::omp::internal::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; + } - for (auto boundsOperand : liveInMapInfoOp.getBounds()) { - auto boundsOp = - mlir::cast(boundsOperand.getDefiningOp()); - mlir::Operation *localExtentDef = - boundsOp.getExtent().getDefiningOp()->clone(); - rewriter.getInsertionBlock()->push_back(localExtentDef); - assert(localExtentDef->getNumResults() == 1); + auto shapeShiftType = fir::ShapeShiftType::get( + builder.getContext(), shapeShiftOperands.size() / 2); + return builder.create( + liveInArg.getLoc(), shapeShiftType, shapeShiftOperands); + } - shapeOpOperands.push_back(localExtentDef->getResult(0)); - } + llvm::SmallVector shapeOperands; - return rewriter.create(liveInArg.getLoc(), - shapeOpOperands); + size_t shapeIdx = 0; + for (auto extent : targetShapeCreationInfo.extents) { + shapeOperands.push_back( + Fortran::lower::omp::internal::mapTemporaryValue( + builder, targetOp, extent, + liveInName + ".extent.dim" + std::to_string(shapeIdx))); + ++shapeIdx; } - std::string opStr; - llvm::raw_string_ostream opOs(opStr); - opOs << "Unsupported type: " << liveInType; - llvm_unreachable(opOs.str().c_str()); + return builder.create(liveInArg.getLoc(), shapeOperands); }(); - return rewriter.create(liveInArg.getLoc(), liveInArg, - liveInMapInfoOp.getName().value(), - shape); + return builder.create(liveInArg.getLoc(), liveInArg, + liveInName, shape); } mlir::omp::TeamsOp diff --git a/flang/test/Transforms/DoConcurrent/basic_device.f90 b/flang/test/Transforms/DoConcurrent/basic_device.f90 index 11eaf60e43dd14..433a204c49f952 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_device.f90 @@ -27,12 +27,14 @@ program do_concurrent_basic ! CHECK-DAG: %[[UB_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.ub"} ! CHECK-DAG: %[[STEP_MAP_INFO:.*]] = omp.map.info {{.*}} !fir.ref {name = "loop.0.step"} + ! CHECK: %[[C1:.*]] = arith.constant 1 : index ! CHECK: %[[C0:.*]] = arith.constant 0 : index - ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index + ! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C1]] : index ! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index) ! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index) ! CHECK-SAME: extent(%[[A_EXTENT]] : index) + ! CHECK-SAME: start_idx(%[[C1]] : index) ! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}}) ! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]]) @@ -45,6 +47,9 @@ program do_concurrent_basic ! CHECK-SAME: %[[STEP_MAP_INFO]] -> %[[STEP_ARG:.[[:alnum:]]+]], ! CHECK-SAME: %[[I_MAP_INFO]] -> %[[I_ARG:[[:alnum:]]+]], ! CHECK-SAME: %[[A_MAP_INFO]] -> %[[A_ARG:.[[:alnum:]]+]] + ! CHECK-SAME: %[[A_EXT:.*]] -> %[[A_EXT_ARG:.[[:alnum:]]+]] + + ! CHECK: %[[A_EXT:.*]] = fir.load %[[A_EXT_ARG]] : !fir.ref ! CHECK: %[[LB_DEV_DECL:.*]]:2 = hlfir.declare %[[LB_ARG]] ! CHECK: %[[LB_DEV_VAL:.*]] = fir.load %[[LB_DEV_DECL]]#1 @@ -55,7 +60,8 @@ program do_concurrent_basic ! CHECK: %[[STEP_DEV_DECL:.*]]:2 = hlfir.declare %[[STEP_ARG]] ! CHECK: %[[STEP_DEV_VAL:.*]] = fir.load %[[STEP_DEV_DECL]]#1 - ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]] + ! CHECK: %[[A_SHAPE:.*]] = fir.shape %[[A_EXT]] : (index) -> !fir.shape<1> + ! CHECK: %[[A_DEV_DECL:.*]]:2 = hlfir.declare %[[A_ARG]](%[[A_SHAPE]]) ! CHECK: omp.teams { ! CHECK-NEXT: omp.parallel { diff --git a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 index 69ad78822b975b..74610e69682ce6 100644 --- a/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 +++ b/flang/test/Transforms/DoConcurrent/runtime_sized_array.f90 @@ -23,7 +23,10 @@ subroutine foo(n) ! CHECK-DAG: %[[I_MAP:.*]] = omp.map.info var_ptr(%[[I_DECL]]#1 : {{.*}}) ! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}}) -! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%[[N_ALLOC]] : {{.*}}) +! CHECK-DAG: %[[LOOP_LB_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "loop.0.lb"} +! CHECK-DAG: %[[LOOP_UB_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "loop.0.ub"} +! CHECK-DAG: %[[LOOP_STEP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) {{.*}} {name = "loop.0.step"} +! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}) ! CHECK: omp.target ! CHECK-SAME: map_entries(%{{[^[:space:]]+}} -> %[[LB_ARG:arg[0-9]*]], @@ -34,12 +37,10 @@ subroutine foo(n) ! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}}) ! CHECK-SAME: { +! CHECK-DAG: %[[N_VAL:.*]] = fir.load %[[N_ARG]] +! CHECK-DAG: %[[A_SHAPE:.*]] = fir.shape %[[N_VAL]] : (index) -> !fir.shape<1> ! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]] -! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]] -! CHECK-DAG: %{{.*}} = fir.load %[[N_ARG]] +! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]](%[[A_SHAPE]]) ! CHECK: omp.terminator ! CHECK: } - - -