-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[flang][OpenMP] Extend do concurrent mapping to device
#155987
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-flang-fir-hlfir Author: Kareem Ergawy (ergawy) ChangesUpstreams further parts of Patch is 25.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155987.diff 4 Files Affected:
diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
index e0aebd0714c8f..b85ee7e861a4f 100644
--- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -26,6 +26,7 @@ add_flang_library(FlangOpenMPTransforms
FIRSupport
FortranSupport
HLFIRDialect
+ FortranUtils
MLIR_DEPS
${dialect_libs}
diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
index c928b76065ade..e975b86a6ba0d 100644
--- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
+++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
@@ -6,17 +6,22 @@
//
//===----------------------------------------------------------------------===//
+#include "flang/Optimizer/Builder/DirectivesCommon.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/OpenMP/Utils.h"
#include "flang/Support/OpenMP-utils.h"
+#include "flang/Utils/OpenMP.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/RegionUtils.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
namespace flangomp {
#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
@@ -107,6 +112,33 @@ struct InductionVariableInfo {
using InductionVariableInfos = llvm::SmallVector<InductionVariableInfo>;
+/// Collect the list of values used inside the loop but defined outside of it.
+void collectLoopLiveIns(fir::DoConcurrentLoopOp loop,
+ llvm::SmallVectorImpl<mlir::Value> &liveIns) {
+ llvm::SmallDenseSet<mlir::Value> seenValues;
+ llvm::SmallDenseSet<mlir::Operation *> seenOps;
+
+ for (auto [lb, ub, st] : llvm::zip_equal(
+ loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
+ liveIns.push_back(lb);
+ liveIns.push_back(ub);
+ liveIns.push_back(st);
+ }
+
+ mlir::visitUsedValuesDefinedAbove(
+ loop.getRegion(), [&](mlir::OpOperand *operand) {
+ if (!seenValues.insert(operand->get()).second)
+ return;
+
+ mlir::Operation *definingOp = operand->get().getDefiningOp();
+ // We want to collect ops corresponding to live-ins only once.
+ if (definingOp && !seenOps.insert(definingOp).second)
+ return;
+
+ liveIns.push_back(operand->get());
+ });
+}
+
/// Collects values that are local to a loop: "loop-local values". A loop-local
/// value is one that is used exclusively inside the loop but allocated outside
/// of it. This usually corresponds to temporary values that are used inside the
@@ -182,10 +214,6 @@ class DoConcurrentConversion
mlir::LogicalResult
matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
- if (mapToDevice)
- return doLoop.emitError(
- "not yet implemented: Mapping `do concurrent` loops to device");
-
looputils::InductionVariableInfos ivInfos;
auto loop = mlir::cast<fir::DoConcurrentLoopOp>(
doLoop.getRegion().back().getTerminator());
@@ -196,20 +224,72 @@ class DoConcurrentConversion
for (mlir::Value indVar : *indVars)
ivInfos.emplace_back(loop, indVar);
+ llvm::SmallVector<mlir::Value> loopNestLiveIns;
+ looputils::collectLoopLiveIns(loop, loopNestLiveIns);
+ assert(!loopNestLiveIns.empty());
+
llvm::SetVector<mlir::Value> locals;
looputils::collectLoopLocalValues(loop, locals);
+ // We do not want to map "loop-local" values to the device through
+ // `omp.map.info` ops. Therefore, we remove them from the list of live-ins.
+ loopNestLiveIns.erase(llvm::remove_if(loopNestLiveIns,
+ [&](mlir::Value liveIn) {
+ return locals.contains(liveIn);
+ }),
+ loopNestLiveIns.end());
+
+ mlir::omp::TargetOp targetOp;
+ mlir::omp::LoopNestOperands loopNestClauseOps;
+
mlir::IRMapping mapper;
+
+ if (mapToDevice) {
+ mlir::ModuleOp module = doLoop->getParentOfType<mlir::ModuleOp>();
+ bool isTargetDevice =
+ llvm::cast<mlir::omp::OffloadModuleInterface>(*module)
+ .getIsTargetDevice();
+
+ mlir::omp::TargetOperands targetClauseOps;
+ genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
+ loopNestClauseOps,
+ isTargetDevice ? nullptr : &targetClauseOps);
+
+ LiveInShapeInfoMap liveInShapeInfoMap;
+ fir::FirOpBuilder builder(
+ rewriter,
+ fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>()));
+
+ for (mlir::Value liveIn : loopNestLiveIns) {
+ targetClauseOps.mapVars.push_back(
+ genMapInfoOpForLiveIn(builder, liveIn));
+ liveInShapeInfoMap.insert(
+ {liveIn, TargetDeclareShapeCreationInfo(liveIn)});
+ }
+
+ targetOp =
+ genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns,
+ targetClauseOps, loopNestClauseOps, liveInShapeInfoMap);
+ genTeamsOp(doLoop.getLoc(), rewriter);
+ }
+
mlir::omp::ParallelOp parallelOp =
genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper);
- mlir::omp::LoopNestOperands loopNestClauseOps;
- genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
- loopNestClauseOps);
+
+ // Only set as composite when part of `distribute parallel do`.
+ parallelOp.setComposite(mapToDevice);
+
+ if (!mapToDevice)
+ genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
+ loopNestClauseOps);
for (mlir::Value local : locals)
looputils::localizeLoopLocalValue(local, parallelOp.getRegion(),
rewriter);
+ if (mapToDevice)
+ genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true);
+
mlir::omp::LoopNestOp ompLoopNest =
genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps,
/*isComposite=*/mapToDevice);
@@ -244,6 +324,51 @@ class DoConcurrentConversion
}
private:
+ struct TargetDeclareShapeCreationInfo {
+ // Note: We use `std::vector` (rather than `llvm::SmallVector` as usual) to
+ // interface more easily `ShapeShiftOp::getOrigins()` which returns
+ // `std::vector`.
+ std::vector<mlir::Value> startIndices{};
+ std::vector<mlir::Value> extents{};
+
+ TargetDeclareShapeCreationInfo(mlir::Value liveIn) {
+ mlir::Value shape = nullptr;
+ mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp();
+ auto declareOp =
+ mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp);
+
+ if (declareOp != nullptr)
+ shape = declareOp.getShape();
+
+ if (shape == nullptr)
+ return;
+
+ auto shapeOp =
+ mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp());
+ auto shapeShiftOp =
+ mlir::dyn_cast_if_present<fir::ShapeShiftOp>(shape.getDefiningOp());
+
+ if (shapeOp == nullptr && shapeShiftOp == nullptr)
+ TODO(liveIn.getLoc(),
+ "Shapes not defined by `fir.shape` or `fir.shape_shift` op's are"
+ "not supported yet.");
+
+ if (shapeShiftOp != nullptr)
+ startIndices = shapeShiftOp.getOrigins();
+
+ extents = shapeOp != nullptr
+ ? std::vector<mlir::Value>(shapeOp.getExtents().begin(),
+ shapeOp.getExtents().end())
+ : shapeShiftOp.getExtents();
+ }
+
+ bool isShapedValue() const { return !extents.empty(); }
+ bool isShapeShiftedValue() const { return !startIndices.empty(); }
+ };
+
+ using LiveInShapeInfoMap =
+ llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>;
+
mlir::omp::ParallelOp
genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
looputils::InductionVariableInfos &ivInfos,
@@ -284,11 +409,11 @@ class DoConcurrentConversion
return result;
}
- void
- genLoopNestClauseOps(mlir::Location loc,
- mlir::ConversionPatternRewriter &rewriter,
- fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
- mlir::omp::LoopNestOperands &loopNestClauseOps) const {
+ void genLoopNestClauseOps(
+ mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+ fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
+ mlir::omp::LoopNestOperands &loopNestClauseOps,
+ mlir::omp::TargetOperands *targetClauseOps = nullptr) const {
assert(loopNestClauseOps.loopLowerBounds.empty() &&
"Loop nest bounds were already emitted!");
@@ -297,11 +422,19 @@ class DoConcurrentConversion
bounds.push_back(var.getDefiningOp()->getResult(0));
};
+ auto hostEvalCapture = [&](mlir::Value var,
+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
+ populateBounds(var, bounds);
+
+ if (targetClauseOps)
+ targetClauseOps->hostEvalVars.push_back(var);
+ };
+
for (auto [lb, ub, st] : llvm::zip_equal(
loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
- populateBounds(lb, loopNestClauseOps.loopLowerBounds);
- populateBounds(ub, loopNestClauseOps.loopUpperBounds);
- populateBounds(st, loopNestClauseOps.loopSteps);
+ hostEvalCapture(lb, loopNestClauseOps.loopLowerBounds);
+ hostEvalCapture(ub, loopNestClauseOps.loopUpperBounds);
+ hostEvalCapture(st, loopNestClauseOps.loopSteps);
}
loopNestClauseOps.loopInclusive = rewriter.getUnitAttr();
@@ -439,6 +572,243 @@ class DoConcurrentConversion
return loopNestOp;
}
+ void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn,
+ mlir::Value rawAddr,
+ llvm::SmallVectorImpl<mlir::Value> &boundsOps) const {
+ fir::ExtendedValue extVal =
+ hlfir::translateToExtendedValue(rawAddr.getLoc(), builder,
+ hlfir::Entity{liveIn},
+ /*contiguousHint=*/
+ true)
+ .first;
+ fir::factory::AddrAndBoundsInfo info = fir::factory::getDataOperandBaseAddr(
+ builder, rawAddr, /*isOptional=*/false, rawAddr.getLoc());
+ boundsOps = fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+ mlir::omp::MapBoundsType>(
+ builder, info, extVal,
+ /*dataExvIsAssumedSize=*/false, rawAddr.getLoc());
+ }
+
+ mlir::omp::MapInfoOp genMapInfoOpForLiveIn(fir::FirOpBuilder &builder,
+ mlir::Value liveIn) const {
+ mlir::Value rawAddr = liveIn;
+ llvm::StringRef name;
+
+ mlir::Operation *liveInDefiningOp = liveIn.getDefiningOp();
+ auto declareOp =
+ mlir::dyn_cast_if_present<hlfir::DeclareOp>(liveInDefiningOp);
+
+ if (declareOp != nullptr) {
+ // Use the raw address to avoid unboxing `fir.box` values whenever
+ // possible. Put differently, if we have access to the direct value memory
+ // reference/address, we use it.
+ rawAddr = declareOp.getOriginalBase();
+ name = declareOp.getUniqName();
+ }
+
+ if (!llvm::isa<mlir::omp::PointerLikeType>(rawAddr.getType())) {
+ builder.setInsertionPointAfter(liveInDefiningOp);
+ auto copyVal = builder.createTemporary(liveIn.getLoc(), liveIn.getType());
+ builder.createStoreWithConvert(copyVal.getLoc(), liveIn, copyVal);
+ rawAddr = copyVal;
+ }
+
+ mlir::Type liveInType = liveIn.getType();
+ mlir::Type eleType = liveInType;
+ if (auto refType = mlir::dyn_cast<fir::ReferenceType>(liveInType))
+ eleType = refType.getElementType();
+
+ llvm::omp::OpenMPOffloadMappingFlags mapFlag =
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
+ mlir::omp::VariableCaptureKind captureKind =
+ mlir::omp::VariableCaptureKind::ByRef;
+
+ if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) {
+ captureKind = mlir::omp::VariableCaptureKind::ByCopy;
+ } else if (!fir::isa_builtin_cptr_type(eleType)) {
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
+ }
+
+ llvm::SmallVector<mlir::Value> boundsOps;
+ genBoundsOps(builder, liveIn, rawAddr, boundsOps);
+
+ return Fortran::utils::openmp::createMapInfoOp(
+ builder, liveIn.getLoc(), rawAddr,
+ /*varPtrPtr=*/{}, name.str(), boundsOps,
+ /*members=*/{},
+ /*membersIndex=*/mlir::ArrayAttr{},
+ static_cast<
+ std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
+ mapFlag),
+ captureKind, rawAddr.getType());
+ }
+
+ mlir::omp::TargetOp
+ genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+ mlir::IRMapping &mapper, llvm::ArrayRef<mlir::Value> mappedVars,
+ mlir::omp::TargetOperands &clauseOps,
+ mlir::omp::LoopNestOperands &loopNestClauseOps,
+ const LiveInShapeInfoMap &liveInShapeInfoMap) const {
+ auto targetOp = rewriter.create<mlir::omp::TargetOp>(loc, clauseOps);
+ auto argIface = llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*targetOp);
+
+ mlir::Region ®ion = targetOp.getRegion();
+
+ llvm::SmallVector<mlir::Type> regionArgTypes;
+ llvm::SmallVector<mlir::Location> regionArgLocs;
+
+ for (auto var : llvm::concat<const mlir::Value>(clauseOps.hostEvalVars,
+ clauseOps.mapVars)) {
+ regionArgTypes.push_back(var.getType());
+ regionArgLocs.push_back(var.getLoc());
+ }
+
+ rewriter.createBlock(®ion, {}, regionArgTypes, regionArgLocs);
+ fir::FirOpBuilder builder(
+ rewriter,
+ fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>()));
+
+ // Within the loop, it possible that we discover other values that need to
+ // mapped to the target region (the shape info values for arrays, for
+ // example). Therefore, the map block args might be extended and resized.
+ // Hence, we invoke `argIface.getMapBlockArgs()` every iteration to make
+ // sure we access the proper vector of data.
+ int idx = 0;
+ for (auto [mapInfoOp, mappedVar] :
+ llvm::zip_equal(clauseOps.mapVars, mappedVars)) {
+ auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp());
+ hlfir::DeclareOp liveInDeclare =
+ genLiveInDeclare(builder, targetOp, argIface.getMapBlockArgs()[idx],
+ miOp, liveInShapeInfoMap.at(mappedVar));
+ ++idx;
+
+ // TODO If `mappedVar.getDefiningOp()` is a `fir::BoxAddrOp`, we probably
+ // need to "unpack" the box by getting the defining op of it's value.
+ // However, we did not hit this case in reality yet so leaving it as a
+ // todo for now.
+
+ auto mapHostValueToDevice = [&](mlir::Value hostValue,
+ mlir::Value deviceValue) {
+ if (!llvm::isa<mlir::omp::PointerLikeType>(hostValue.getType()))
+ mapper.map(hostValue,
+ builder.loadIfRef(hostValue.getLoc(), deviceValue));
+ else
+ mapper.map(hostValue, deviceValue);
+ };
+
+ mapHostValueToDevice(mappedVar, liveInDeclare.getOriginalBase());
+
+ if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
+ mappedVar.getDefiningOp()))
+ mapHostValueToDevice(origDeclareOp.getBase(), liveInDeclare.getBase());
+ }
+
+ for (auto [arg, hostEval] : llvm::zip_equal(argIface.getHostEvalBlockArgs(),
+ clauseOps.hostEvalVars))
+ mapper.map(hostEval, arg);
+
+ for (unsigned i = 0; i < loopNestClauseOps.loopLowerBounds.size(); ++i) {
+ loopNestClauseOps.loopLowerBounds[i] =
+ mapper.lookup(loopNestClauseOps.loopLowerBounds[i]);
+ loopNestClauseOps.loopUpperBounds[i] =
+ mapper.lookup(loopNestClauseOps.loopUpperBounds[i]);
+ loopNestClauseOps.loopSteps[i] =
+ mapper.lookup(loopNestClauseOps.loopSteps[i]);
+ }
+
+ // Check if cloning the bounds introduced any dependency on the outer
+ // region. If so, then either clone them as well if they are
+ // MemoryEffectFree, or else copy them to a new temporary and add them to
+ // the map and block_argument lists and replace their uses with the new
+ // temporary.
+ Fortran::utils::openmp::cloneOrMapRegionOutsiders(builder, targetOp);
+ rewriter.setInsertionPoint(
+ rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc()));
+
+ return targetOp;
+ }
+
+ hlfir::DeclareOp genLiveInDeclare(
+ fir::FirOpBuilder &builder, mlir::omp::TargetOp targetOp,
+ mlir::Value liveInArg, mlir::omp::MapInfoOp liveInMapInfoOp,
+ const TargetDeclareShapeCreationInfo &targetShapeCreationInfo) const {
+ mlir::Type liveInType = liveInArg.getType();
+ std::string liveInName = liveInMapInfoOp.getName().has_value()
+ ? liveInMapInfoOp.getName().value().str()
+ : std::string("");
+ if (fir::isa_ref_type(liveInType))
+ liveInType = fir::unwrapRefType(liveInType);
+
+ mlir::Value shape = [&]() -> mlir::Value {
+ if (!targetShapeCreationInfo.isShapedValue())
+ return {};
+
+ llvm::SmallVector<mlir::Value> extentOperands;
+ llvm::SmallVector<mlir::Value> startIndexOperands;
+
+ if (targetShapeCreationInfo.isShapeShiftedValue()) {
+ llvm::SmallVector<mlir::Value> shapeShiftOperands;
+
+ size_t shapeIdx = 0;
+ for (auto [startIndex, extent] :
+ llvm::zip_equal(targetShapeCreationInfo.startIndices,
+ targetShapeCreationInfo.extents)) {
+ shapeShiftOperands.push_back(
+ Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, startIndex,
+ liveInName + ".start_idx.dim" + std::to_string(shapeIdx)));
+ shapeShiftOperands.push_back(
+ Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, extent,
+ liveInName + ".extent.dim" + std::to_string(shapeIdx)));
+ ++shapeIdx;
+ }
+
+ auto shapeShiftType = fir::ShapeShiftType::get(
+ builder.getContext(), shapeShiftOperands.size() / 2);
+ return builder.create<fir::ShapeShiftOp>(
+ liveInArg.getLoc(), shapeShiftType, shapeShiftOperands);
+ }
+
+ llvm::SmallVector<mlir::Value> shapeOperands;
+ size_t shapeIdx = 0;
+ for (auto extent : targetShapeCreationInfo.extents) {
+ shapeOperands.push_back(Fortran::utils::openmp::mapTemporaryValue(
+ builder, targetOp, extent,
+ liveInName + ".extent.dim" + std::to_string(shapeIdx)));
+ ++shapeIdx;
+ }
+
+ return builder.create<fir::ShapeOp>(liveInArg.getLoc(), shapeOperands);
+ }();
+
+ return builder.create<hlfir::DeclareOp>(liveInArg.getLoc(), liveInArg,
+ liveInName, shape);
+ }
+
+ mlir::omp::TeamsOp
+ genTeamsOp(mlir::Location loc,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ auto teamsOp = rewriter.create<mlir::omp::TeamsOp>(
+ loc, /*clauses=*/mlir::omp::TeamsOperands{});
+
+ rewriter.createBlock(&teamsOp.getRegion());
+ rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
+
+ return teamsOp;
+ }
+
+ mlir::omp::DistributeOp
+ genDistributeOp(mlir::Location loc,
+ mlir::ConversionPatternRewriter &rewriter) const {
+ auto distOp = rewriter.create<mlir::omp::DistributeOp>(
+ loc, /*clauses=*/mlir::omp::DistributeOperands{});
+
+ rewriter.createBlock(&distOp.getRegion());
+ return...
[truncated]
|
576a68b to
c1b09a1
Compare
26e7330 to
0373863
Compare
c1b09a1 to
1576d6f
Compare
0373863 to
196da8d
Compare
1576d6f to
4f3924f
Compare
196da8d to
5438f65
Compare
|
Ping! Please have a look when you have time. |
4f3924f to
c9be071
Compare
5438f65 to
343e07c
Compare
c9be071 to
ead5548
Compare
343e07c to
959c75f
Compare
…ide values (#155754) Following up on #154483, this PR introduces further refactoring to extract some shared utils between OpenMP lowering and `do concurrent` conversion pass. In particular, this PR extracts 2 utils that handle mapping or cloning values used inside target regions but defined outside. Later `do concurrent` PR(s) will also use these utils. PR stack: - #155754◀️ - #155987 - #155992 - #155993 - #156589 - #156610 - #156837
959c75f to
19c73bd
Compare
Applies upstream PR llvm#155987 to avoid annoying merge conflicts later on.
…#155987) Upstreams further parts of `do concurrent` to OpenMP conversion pass from AMD's fork. This PR extends the pass by adding support for mapping to the device. PR stack: - llvm/llvm-project#155754 - llvm/llvm-project#155987◀️ - llvm/llvm-project#155992 - llvm/llvm-project#155993 - llvm/llvm-project#157638 - llvm/llvm-project#156610 - llvm/llvm-project#156837
… tests (#155992) Adds more lit tests for `do concurrent` device mapping. PR stack: - llvm/llvm-project#155754 - llvm/llvm-project#155987 - llvm/llvm-project#155992◀️ - llvm/llvm-project#155993 - llvm/llvm-project#157638 - llvm/llvm-project#156610 - llvm/llvm-project#156837
…nMP mapping (#155993) Adds end-to-end tests for `do concurrent` offloading to the device. PR stack: - llvm/llvm-project#155754 - llvm/llvm-project#155987 - llvm/llvm-project#155992 - llvm/llvm-project#155993◀️ - llvm/llvm-project#157638 - llvm/llvm-project#156610 - llvm/llvm-project#156837
Extends support for mapping `do concurrent` on the device by adding support for `local` specifiers. The changes in this PR map the local variable to the `omp.target` op and uses the mapped value as the `private` clause operand in the nested `omp.parallel` op. - #155754 - #155987 - #155992 - #155993 - #157638◀️ - #156610 - #156837
… (#157638) Extends support for mapping `do concurrent` on the device by adding support for `local` specifiers. The changes in this PR map the local variable to the `omp.target` op and uses the mapped value as the `private` clause operand in the nested `omp.parallel` op. - llvm/llvm-project#155754 - llvm/llvm-project#155987 - llvm/llvm-project#155992 - llvm/llvm-project#155993 - llvm/llvm-project#157638◀️ - llvm/llvm-project#156610 - llvm/llvm-project#156837
Extends `do concurrent` to OpenMP device mapping by adding support for mapping `reduce` specifiers to omp `reduction` clauses. The changes attach 2 `reduction` clauses to the mapped OpenMP construct: one on the `teams` part of the construct and one on the `wloop` part. - #155754 - #155987 - #155992 - #155993 - #157638 - #156610◀️ - #156837
…e (#156610) Extends `do concurrent` to OpenMP device mapping by adding support for mapping `reduce` specifiers to omp `reduction` clauses. The changes attach 2 `reduction` clauses to the mapped OpenMP construct: one on the `teams` part of the construct and one on the `wloop` part. - llvm/llvm-project#155754 - llvm/llvm-project#155987 - llvm/llvm-project#155992 - llvm/llvm-project#155993 - llvm/llvm-project#157638 - llvm/llvm-project#156610◀️ - llvm/llvm-project#156837
…ions on the GPU (#156837) Fixes a bug related to insertion points when inlining multi-block combiner reduction regions. The IP at the end of the inlined region was not used resulting in emitting BBs with multiple terminators. PR stack: - llvm/llvm-project#155754 - llvm/llvm-project#155987 - llvm/llvm-project#155992 - llvm/llvm-project#155993 - llvm/llvm-project#157638 - llvm/llvm-project#156610 - llvm/llvm-project#156837◀️
Upstreams further parts of
do concurrentto OpenMP conversion pass from AMD's fork. This PR extends the pass by adding support for mapping to the device.PR stack:
do concurrentmapping to device #155987do concurrentto device mapping lit tests #155992do concurrent: supportlocalon device #157638do concurrent: supportreduceon device #156610