Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions flang/lib/Optimizer/Transforms/CUDA/CUFOpConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/IR/Matchers.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
Expand All @@ -49,9 +50,9 @@ namespace {
static bool inDeviceContext(mlir::Operation *op) {
if (op->getParentOfType<cuf::KernelOp>())
return true;
if (auto funcOp = op->getParentOfType<mlir::gpu::GPUFuncOp>())
if (op->getParentOfType<mlir::acc::OffloadRegionOpInterface>())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The diffs look a bit weird - so to explain what I did:

  • I added a new check for acc::OffloadRegionOpInterface which includes both acc regions and gpu.launch
  • I removed the gpu.launch case since it is already covered.

return true;
if (auto funcOp = op->getParentOfType<mlir::gpu::LaunchOp>())
if (auto funcOp = op->getParentOfType<mlir::gpu::GPUFuncOp>())
return true;
if (auto funcOp = op->getParentOfType<mlir::func::FuncOp>()) {
if (auto cudaProcAttr =
Expand Down Expand Up @@ -128,6 +129,9 @@ struct DeclareOpConversion : public mlir::OpRewritePattern<fir::DeclareOp> {
if (op.getResult().getUsers().empty())
return success();
if (auto addrOfOp = op.getMemref().getDefiningOp<fir::AddrOfOp>()) {
if (inDeviceContext(addrOfOp)) {
return failure();
}
if (auto global = symTab.lookup<fir::GlobalOp>(
addrOfOp.getSymbol().getRootReference().getValue())) {
if (cuf::isRegisteredDeviceGlobal(global)) {
Expand Down
27 changes: 27 additions & 0 deletions flang/test/Fir/CUDA/cuda-global-addr.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,33 @@ func.func @_QQmain() attributes {fir.bindc_name = "test"} {

// -----

// Check that we do not introduce call to _FortranACUFGetDeviceAddress when the
// address_of is inside an acc.parallel region (OffloadRegionOpInterface).

module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
fir.global @_QMmod1Eadev_acc {data_attr = #cuf.cuda<device>} : !fir.array<10xi32> {
%0 = fir.zero_bits !fir.array<10xi32>
fir.has_value %0 : !fir.array<10xi32>
}
func.func @_QQmain_acc() attributes {fir.bindc_name = "test_acc"} {
acc.parallel {
%c10 = arith.constant 10 : index
%1 = fir.shape %c10 : (index) -> !fir.shape<1>
%3 = fir.address_of(@_QMmod1Eadev_acc) : !fir.ref<!fir.array<10xi32>>
%4 = fir.declare %3(%1) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Eadev_acc"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
acc.yield
}
return
}

// CHECK-LABEL: func.func @_QQmain_acc()
// CHECK: acc.parallel
// CHECK-NOT: fir.call {{.*}}GetDeviceAddress

}

// -----

// Check that we do not introduce call to _FortranACUFGetDeviceAddress when the
// value has no user.

Expand Down