diff --git a/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h new file mode 100644 index 000000000000..2525f6abd850 --- /dev/null +++ b/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h @@ -0,0 +1,28 @@ +//===- GPUToNVMMPass.h - Convert GPU kernel to ROCDL dialect -----*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +#ifndef MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_ +#define MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_ + +namespace mlir { +struct FunctionPassBase; + +/// Creates a pass that lowers GPU dialect operations to ROCDL counterparts. +FunctionPassBase *createLowerGpuOpsToROCDLOpsPass(); + +} // namespace mlir + +#endif // MLIR_CONVERSION_GPUTOROCDL_GPUTOROCDLPASS_H_ diff --git a/include/mlir/LLVMIR/CMakeLists.txt b/include/mlir/LLVMIR/CMakeLists.txt index 1d7d06bc25c1..3e5a0346ed60 100644 --- a/include/mlir/LLVMIR/CMakeLists.txt +++ b/include/mlir/LLVMIR/CMakeLists.txt @@ -8,9 +8,16 @@ set(LLVM_TARGET_DEFINITIONS NVVMOps.td) mlir_tablegen(NVVMOps.h.inc -gen-op-decls) mlir_tablegen(NVVMOps.cpp.inc -gen-op-defs) add_public_tablegen_target(MLIRNVVMOpsIncGen) +set(LLVM_TARGET_DEFINITIONS ROCDLOps.td) +mlir_tablegen(ROCDLOps.h.inc -gen-op-decls) +mlir_tablegen(ROCDLOps.cpp.inc -gen-op-defs) +add_public_tablegen_target(MLIRROCDLOpsIncGen) set(LLVM_TARGET_DEFINITIONS LLVMOps.td) mlir_tablegen(LLVMConversions.inc -gen-llvmir-conversions) add_public_tablegen_target(MLIRLLVMConversionsIncGen) set(LLVM_TARGET_DEFINITIONS NVVMOps.td) mlir_tablegen(NVVMConversions.inc -gen-llvmir-conversions) add_public_tablegen_target(MLIRNVVMConversionsIncGen) +set(LLVM_TARGET_DEFINITIONS ROCDLOps.td) +mlir_tablegen(ROCDLConversions.inc -gen-llvmir-conversions) +add_public_tablegen_target(MLIRROCDLConversionsIncGen) diff --git a/include/mlir/LLVMIR/ROCDLDialect.h b/include/mlir/LLVMIR/ROCDLDialect.h new file mode 100644 index 000000000000..499f88f1c27e --- /dev/null +++ b/include/mlir/LLVMIR/ROCDLDialect.h @@ -0,0 +1,43 @@ +//===- ROCDLDialect.h - MLIR ROCDL IR dialect ---------------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file defines the ROCDL IR dialect in MLIR, containing ROCDL operations and +// ROCDL specific extensions to the LLVM type system. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_LLVMIR_ROCDLDIALECT_H_ +#define MLIR_LLVMIR_ROCDLDIALECT_H_ + +#include "mlir/IR/Dialect.h" +#include "mlir/IR/OpDefinition.h" +namespace mlir { +namespace ROCDL { + +///// Ops ///// +#define GET_OP_CLASSES +#include "mlir/LLVMIR/ROCDLOps.h.inc" + +class ROCDLDialect : public Dialect { +public: + explicit ROCDLDialect(MLIRContext *context); +}; + +} // namespace ROCDL +} // namespace mlir + +#endif /* MLIR_LLVMIR_ROCDLDIALECT_H_ */ diff --git a/include/mlir/LLVMIR/ROCDLOps.td b/include/mlir/LLVMIR/ROCDLOps.td new file mode 100644 index 000000000000..a49f00e1b94b --- /dev/null +++ b/include/mlir/LLVMIR/ROCDLOps.td @@ -0,0 +1,70 @@ +//===-- ROCDLOps.td - ROCDL IR dialect op definition file ----*- tablegen -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This is the ROCDL IR operation definition file. +// +//===----------------------------------------------------------------------===// + +#ifdef ROCDLIR_OPS +#else +#define ROCDLIR_OPS + +include "mlir/LLVMIR/LLVMOpBase.td" + +def ROCDL_Dialect : Dialect { + let name = "rocdl"; + let cppNamespace = "ROCDL"; +} + +class ROCDL_Op traits = []> : + LLVM_OpBase { +} + +class ROCDL_SpecialRegisterOp traits = []> : + ROCDL_Op, + Results<(outs LLVM_Type:$res)>, Arguments<(ins)> { + string llvmBuilder = "$res = createIntrinsicCall(builder," + # "llvm::Intrinsic::amdgcn_" # !subst(".","_", mnemonic) # ");"; + let parser = [{ return parseROCDLOp(parser, result); }]; + let printer = [{ printROCDLOp(p, this->getOperation()); }]; +} + +def ROCDL_ThreadIdXOp : ROCDL_SpecialRegisterOp<"workitem.id.x">; +def ROCDL_ThreadIdYOp : ROCDL_SpecialRegisterOp<"workitem.id.y">; +def ROCDL_ThreadIdZOp : ROCDL_SpecialRegisterOp<"workitem.id.z">; +def ROCDL_BlockIdXOp : ROCDL_SpecialRegisterOp<"workgroup.id.x">; +def ROCDL_BlockIdYOp : ROCDL_SpecialRegisterOp<"workgroup.id.y">; +def ROCDL_BlockIdZOp : ROCDL_SpecialRegisterOp<"workgroup.id.z">; + +class ROCDL_DeviceFunctionOp traits = []> : + ROCDL_Op, + Results<(outs LLVM_Type:$res)>, Arguments<(ins)> { + string llvmBuilder = "$res = createDeviceFunctionCall(builder, \""# device_function # "\", " # parameter # ");"; + let parser = [{ return parseROCDLOp(parser, result); }]; + let printer = [{ printROCDLOp(p, this->getOperation()); }]; +} + +def ROCDL_BlockDimXOp : ROCDL_DeviceFunctionOp<"workgroup.dim.x", "__ockl_get_local_size", 0>; +def ROCDL_BlockDimYOp : ROCDL_DeviceFunctionOp<"workgroup.dim.y", "__ockl_get_local_size", 1>; +def ROCDL_BlockDimZOp : ROCDL_DeviceFunctionOp<"workgroup.dim.z", "__ockl_get_local_size", 2>; +def ROCDL_GridDimXOp : ROCDL_DeviceFunctionOp<"grid.dim.x", "__ockl_get_global_size", 0>; +def ROCDL_GridDimYOp : ROCDL_DeviceFunctionOp<"grid.dim.y", "__ockl_get_global_size", 1>; +def ROCDL_GridDimZOp : ROCDL_DeviceFunctionOp<"grid.dim.z", "__ockl_get_global_size", 2>; + +#endif // ROCDLIR_OPS diff --git a/include/mlir/Target/ROCDLIR.h b/include/mlir/Target/ROCDLIR.h new file mode 100644 index 000000000000..a7be2ebefc19 --- /dev/null +++ b/include/mlir/Target/ROCDLIR.h @@ -0,0 +1,44 @@ +//===- ROCDLIR.h - MLIR to LLVM + ROCDL IR conversion -------------*- C++ -*-===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file declares the entry point for the MLIR to LLVM + ROCDL IR conversion. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_ROCDLIR_H +#define MLIR_TARGET_ROCDLIR_H + +#include + +// Forward-declare LLVM classses. +namespace llvm { +class Module; +} // namespace llvm + +namespace mlir { +class ModuleOp; + +/// Convert the given MLIR module into ROCDL IR. This conversion requires the +/// registration of the LLVM IR dialect and will extract the LLVM context +/// from the registered LLVM IR dialect. In case of error, report it +/// to the error handler registered with the MLIR context, if any (obtained from +/// the MLIR module), and return `nullptr`. +std::unique_ptr translateModuleToROCDLIR(ModuleOp m); + +} // namespace mlir + +#endif // MLIR_TARGET_ROCDLIR_H diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt index 1ddd103f28e8..c715c8ff4a1e 100644 --- a/lib/Conversion/CMakeLists.txt +++ b/lib/Conversion/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(LoopsToGPU) add_subdirectory(ControlFlowToCFG) add_subdirectory(GPUToCUDA) add_subdirectory(GPUToNVVM) +add_subdirectory(GPUToROCDL) add_subdirectory(GPUToSPIRV) add_subdirectory(StandardToLLVM) add_subdirectory(StandardToSPIRV) diff --git a/lib/Conversion/GPUToROCDL/CMakeLists.txt b/lib/Conversion/GPUToROCDL/CMakeLists.txt new file mode 100644 index 000000000000..3c97e5ca86ba --- /dev/null +++ b/lib/Conversion/GPUToROCDL/CMakeLists.txt @@ -0,0 +1,10 @@ +add_llvm_library(MLIRGPUtoROCDLTransforms + LowerGpuOpsToROCDLOps.cpp + ) +target_link_libraries(MLIRGPUtoROCDLTransforms + LLVMSupport + MLIRGPU + MLIRLLVMIR + MLIRROCDLIR + MLIRPass + ) diff --git a/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp new file mode 100644 index 000000000000..83ac0939ccf8 --- /dev/null +++ b/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -0,0 +1,142 @@ +//===- LowerGpuOpsToROCDLOps.cpp - MLIR GPU to ROCDL lowering passes --------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements a pass to generate ROCDLIR operations for higher-level +// GPU operations. +// +//===----------------------------------------------------------------------===// + +#include "mlir/GPU/GPUDialect.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/LLVMIR/LLVMDialect.h" +#include "mlir/LLVMIR/ROCDLDialect.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" + +#include "llvm/ADT/StringSwitch.h" + +namespace mlir { +namespace { + +// A pass that replaces all occurences of GPU operations with their +// corresponding ROCDL equivalent. +// +// This pass does not handle launching of kernels. Instead, it is meant to be +// used on the body region of a launch or the body region of a kernel +// function. +class LowerGpuOpsToROCDLOpsPass : public FunctionPass { +private: + enum dimension { X = 0, Y = 1, Z = 2, invalid }; + + template dimension dimensionToIndex(T op) { + return llvm::StringSwitch(op.dimension()) + .Case("x", X) + .Case("y", Y) + .Case("z", Z) + .Default(invalid); + } + + // Helper that replaces Op with XOp, YOp, or ZOp dependeing on the dimension + // that Op operates on. Op is assumed to return an `std.index` value and + // XOp, YOp and ZOp are assumed to return an `llvm.i32` value. Depending on + // `indexBitwidth`, sign-extend or truncate the resulting value to match the + // bitwidth expected by the consumers of the value. + template + void replaceWithIntrinsicOrDeviceFunction(Op operation, LLVM::LLVMDialect *dialect, + unsigned indexBitwidth) { + assert(operation.getType().isIndex() && + "expected an operation returning index"); + OpBuilder builder(operation); + auto loc = operation.getLoc(); + Value *newOp; + switch (dimensionToIndex(operation)) { + case X: + newOp = builder.create(loc, LLVM::LLVMType::getInt32Ty(dialect)); + break; + case Y: + newOp = builder.create(loc, LLVM::LLVMType::getInt32Ty(dialect)); + break; + case Z: + newOp = builder.create(loc, LLVM::LLVMType::getInt32Ty(dialect)); + break; + default: + operation.emitError("Illegal dimension: " + operation.dimension()); + signalPassFailure(); + return; + } + + if (indexBitwidth > 32) { + newOp = builder.create( + loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp); + } else if (indexBitwidth < 32) { + newOp = builder.create( + loc, LLVM::LLVMType::getIntNTy(dialect, indexBitwidth), newOp); + } + operation.replaceAllUsesWith(newOp); + operation.erase(); + } + +public: + void runOnFunction() { + LLVM::LLVMDialect *llvmDialect = + getContext().getRegisteredDialect(); + unsigned indexBitwidth = + llvmDialect->getLLVMModule().getDataLayout().getPointerSizeInBits(); + getFunction().walk([&](Operation *opInst) { + if (auto threadId = dyn_cast(opInst)) { + replaceWithIntrinsicOrDeviceFunction(threadId, llvmDialect, + indexBitwidth); + return; + } + if (auto blockId = dyn_cast(opInst)) { + replaceWithIntrinsicOrDeviceFunction(blockId, llvmDialect, + indexBitwidth); + return; + } + + // BlockDimX/Y/Z are implemented as device functions on ROCDL. + if (auto blockDim = dyn_cast(opInst)) { + replaceWithIntrinsicOrDeviceFunction< + ROCDL::BlockDimXOp, ROCDL::BlockDimYOp, ROCDL::BlockDimZOp>( + blockDim, llvmDialect, indexBitwidth); + return; + } + // GridDimX/Y/Z are implemented as device functions on ROCDL. + if (auto gridDim = dyn_cast(opInst)) { + replaceWithIntrinsicOrDeviceFunction< + ROCDL::GridDimXOp, ROCDL::GridDimYOp, ROCDL::GridDimZOp>( + gridDim, llvmDialect, indexBitwidth); + return; + } + }); + } +}; + +} // anonymous namespace + +FunctionPassBase *createLowerGpuOpsToROCDLOpsPass() { + return new LowerGpuOpsToROCDLOpsPass(); +} + +static PassRegistration + pass("lower-gpu-ops-to-rocdl-ops", + "Generate ROCDL operations for gpu operations"); + +} // namespace mlir diff --git a/lib/LLVMIR/CMakeLists.txt b/lib/LLVMIR/CMakeLists.txt index 5e21850dbac5..546bfda69dcf 100644 --- a/lib/LLVMIR/CMakeLists.txt +++ b/lib/LLVMIR/CMakeLists.txt @@ -15,3 +15,12 @@ add_llvm_library(MLIRNVVMIR ) add_dependencies(MLIRNVVMIR MLIRNVVMOpsIncGen MLIRNVVMConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport) target_link_libraries(MLIRNVVMIR LLVMAsmParser LLVMCore LLVMSupport) + +add_llvm_library(MLIRROCDLIR + IR/ROCDLDialect.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/LLVMIR + ) +add_dependencies(MLIRROCDLIR MLIRROCDLOpsIncGen MLIRROCDLConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport) +target_link_libraries(MLIRROCDLIR LLVMAsmParser LLVMCore LLVMSupport) diff --git a/lib/LLVMIR/IR/ROCDLDialect.cpp b/lib/LLVMIR/IR/ROCDLDialect.cpp new file mode 100644 index 000000000000..80f0001c1fff --- /dev/null +++ b/lib/LLVMIR/IR/ROCDLDialect.cpp @@ -0,0 +1,86 @@ +//===- ROCDLDialect.cpp - ROCDL IR Ops and Dialect registration -------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file defines the types and operation details for the ROCDL IR dialect in +// MLIR, and the LLVM IR dialect. It also registers the dialect. +// +// The ROCDL dialect only contains GPU specific additions on top of the general +// LLVM dialect. +// +//===----------------------------------------------------------------------===// + +#include "mlir/LLVMIR/ROCDLDialect.h" + +#include "mlir/IR/Builders.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/LLVMIR/LLVMDialect.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/SourceMgr.h" + +namespace mlir { +namespace ROCDL { + +//===----------------------------------------------------------------------===// +// Printing/parsing for ROCDL ops +//===----------------------------------------------------------------------===// + +static void printROCDLOp(OpAsmPrinter *p, Operation *op) { + *p << op->getName() << " : "; + if (op->getNumResults() == 1) { + *p << op->getResult(0)->getType(); + } else { + *p << "###invalid type###"; + } +} + +// ::= `llvm.rocdl.XYZ` : type +static ParseResult parseROCDLOp(OpAsmParser *parser, OperationState *result) { + Type type; + if (parser->parseOptionalAttributeDict(result->attributes) || + parser->parseColonType(type)) + return failure(); + + result->addTypes(type); + return success(); +} + +//===----------------------------------------------------------------------===// +// ROCDLDialect initialization, type parsing, and registration. +//===----------------------------------------------------------------------===// + +ROCDLDialect::ROCDLDialect(MLIRContext *context) : Dialect("rocdl", context) { + addOperations< +#define GET_OP_LIST +#include "mlir/LLVMIR/ROCDLOps.cpp.inc" + >(); + + // Support unknown operations because not all ROCDL operations are registered. + allowUnknownOperations(); +} + +#define GET_OP_CLASSES +#include "mlir/LLVMIR/ROCDLOps.cpp.inc" + +static DialectRegistration rocdlDialect; + +} // namespace ROCDL +} // namespace mlir diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 9f49b813336b..111e2f673137 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -28,3 +28,17 @@ target_link_libraries(MLIRTargetNVVMIR MLIRNVVMIR MLIRTargetLLVMIRModuleTranslation ) +add_llvm_library(MLIRTargetROCDLIR + LLVMIR/ConvertToROCDLIR.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR + DEPENDS + intrinsics_gen + ) +target_link_libraries(MLIRTargetROCDLIR + MLIRGPU + MLIRIR + MLIRROCDLIR + MLIRTargetLLVMIRModuleTranslation + ) diff --git a/lib/Target/LLVMIR/ConvertToROCDLIR.cpp b/lib/Target/LLVMIR/ConvertToROCDLIR.cpp new file mode 100644 index 000000000000..568dc19cd3d0 --- /dev/null +++ b/lib/Target/LLVMIR/ConvertToROCDLIR.cpp @@ -0,0 +1,117 @@ +//===- ConvertToROCDLIR.cpp - MLIR to LLVM IR conversion -------------------===// +// +// Copyright 2019 The MLIR Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= +// +// This file implements a translation between the MLIR LLVM + ROCDL dialects and +// LLVM IR with ROCDL intrinsics and metadata. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/ROCDLIR.h" + +#include "mlir/GPU/GPUDialect.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Module.h" +#include "mlir/LLVMIR/ROCDLDialect.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Target/LLVMIR/ModuleTranslation.h" +#include "mlir/Translation.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ToolOutputFile.h" + +using namespace mlir; + +namespace { +static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder, + llvm::Intrinsic::ID intrinsic) { + llvm::Module *module = builder.GetInsertBlock()->getModule(); + llvm::Function *fn = llvm::Intrinsic::getDeclaration(module, intrinsic, {}); + return builder.CreateCall(fn); +} + +// ROCM TODO: review interface +static llvm::Value *createDeviceFunctionCall(llvm::IRBuilder<> &builder, + StringRef fn_name, int parameter) { + llvm::Module *module = builder.GetInsertBlock()->getModule(); + llvm::FunctionType *fn_type = llvm::FunctionType::get( + llvm::Type::getInt32Ty(module->getContext()), // return type. + llvm::Type::getInt32Ty(module->getContext()), // parameter type. + false); // no variadic arguments. + llvm::Function *fn = llvm::dyn_cast( + module->getOrInsertFunction(fn_name, fn_type).getCallee()); + llvm::ArrayRef operands(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(module->getContext()), parameter)); + return builder.CreateCall(fn, operands); +} + +class ModuleTranslation : public LLVM::ModuleTranslation { + +public: + explicit ModuleTranslation(ModuleOp module) + : LLVM::ModuleTranslation(module) {} + ~ModuleTranslation() override {} + +protected: + bool convertOperation(Operation &opInst, + llvm::IRBuilder<> &builder) override { + +#include "mlir/LLVMIR/ROCDLConversions.inc" + + return LLVM::ModuleTranslation::convertOperation(opInst, builder); + } +}; +} // namespace + +std::unique_ptr mlir::translateModuleToROCDLIR(ModuleOp m) { + ModuleTranslation translation(m); + auto llvmModule = + LLVM::ModuleTranslation::translateModule(m); + + // Insert AMDGPU_KERNEL calling convention. + // Insert amdgpu-flat-workgroup-size(1, 1024) attribute. + for (FuncOp func : m.getOps()) { + if (!func.getAttrOfType(gpu::GPUDialect::getKernelFuncAttrName())) + continue; + + auto *llvmFunc = llvmModule->getFunction(func.getName()); + + llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1, 1024"); + } + + return llvmModule; +} + +static TranslateFromMLIRRegistration + registration("mlir-to-rocdlir", + [](ModuleOp module, llvm::StringRef outputFilename) { + if (!module) + return failure(); + + auto llvmModule = mlir::translateModuleToROCDLIR(module); + if (!llvmModule) + return failure(); + + auto file = openOutputFile(outputFilename); + if (!file) + return failure(); + + llvmModule->print(file->os(), nullptr); + file->keep(); + return success(); + }); diff --git a/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir new file mode 100644 index 000000000000..4b5177e3f733 --- /dev/null +++ b/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -0,0 +1,35 @@ +// RUN: mlir-opt %s -lower-gpu-ops-to-rocdl-ops | FileCheck %s + +// CHECK-LABEL: func @gpu_index_ops() +func @gpu_index_ops() + attributes { gpu.kernel } { + // CHECK: = rocdl.workitem.id.x : !llvm.i32 + %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index) + // CHECK: = rocdl.workitem.id.y : !llvm.i32 + %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index) + // CHECK: = rocdl.workitem.id.z : !llvm.i32 + %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index) + + // CHECK: = rocdl.workgroup.id.x : !llvm.i32 + %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index) + // CHECK: = rocdl.workgroup.id.y : !llvm.i32 + %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index) + // CHECK: = rocdl.workgroup.id.z : !llvm.i32 + %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index) + + // CHECK: = rocdl.workgroup.dim.x : !llvm.i32 + %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index) + // CHECK: = rocdl.workgroup.dim.y : !llvm.i32 + %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index) + // CHECK: = rocdl.workgroup.dim.z : !llvm.i32 + %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index) + + // CHECK: = rocdl.grid.dim.x : !llvm.i32 + %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index) + // CHECK: = rocdl.grid.dim.y : !llvm.i32 + %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index) + // CHECK: = rocdl.grid.dim.z : !llvm.i32 + %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + + std.return +} diff --git a/test/LLVMIR/rocdl.mlir b/test/LLVMIR/rocdl.mlir new file mode 100644 index 000000000000..82f82ea99f1a --- /dev/null +++ b/test/LLVMIR/rocdl.mlir @@ -0,0 +1,29 @@ +// RUN: mlir-opt %s | FileCheck %s + +func @rocdl_special_regs() -> !llvm.i32 { + // CHECK: %0 = rocdl.workitem.id.x : !llvm.i32 + %0 = rocdl.workitem.id.x : !llvm.i32 + // CHECK: %1 = rocdl.workitem.id.y : !llvm.i32 + %1 = rocdl.workitem.id.y : !llvm.i32 + // CHECK: %2 = rocdl.workitem.id.z : !llvm.i32 + %2 = rocdl.workitem.id.z : !llvm.i32 + // CHECK: %3 = rocdl.workgroup.id.x : !llvm.i32 + %3 = rocdl.workgroup.id.x : !llvm.i32 + // CHECK: %4 = rocdl.workgroup.id.y : !llvm.i32 + %4 = rocdl.workgroup.id.y : !llvm.i32 + // CHECK: %5 = rocdl.workgroup.id.z : !llvm.i32 + %5 = rocdl.workgroup.id.z : !llvm.i32 + // CHECK: %6 = rocdl.workgroup.dim.x : !llvm.i32 + %6 = rocdl.workgroup.dim.x : !llvm.i32 + // CHECK: %7 = rocdl.workgroup.dim.y : !llvm.i32 + %7 = rocdl.workgroup.dim.y : !llvm.i32 + // CHECK: %8 = rocdl.workgroup.dim.z : !llvm.i32 + %8 = rocdl.workgroup.dim.z : !llvm.i32 + // CHECK: %9 = rocdl.grid.dim.x : !llvm.i32 + %9 = rocdl.grid.dim.x : !llvm.i32 + // CHECK: %10 = rocdl.grid.dim.y : !llvm.i32 + %10 = rocdl.grid.dim.y : !llvm.i32 + // CHECK: %11 = rocdl.grid.dim.z : !llvm.i32 + %11 = rocdl.grid.dim.z : !llvm.i32 + llvm.return %0 : !llvm.i32 +} diff --git a/test/Target/rocdlir.mlir b/test/Target/rocdlir.mlir new file mode 100644 index 000000000000..65b97d0aa689 --- /dev/null +++ b/test/Target/rocdlir.mlir @@ -0,0 +1,35 @@ +// RUN: mlir-translate -mlir-to-rocdlir %s | FileCheck %s + +func @rocdl_special_regs() -> !llvm.i32 { + // CHECK: %1 = call i32 @llvm.amdgcn.workitem.id.x() + %1 = rocdl.workitem.id.x : !llvm.i32 + // CHECK: %2 = call i32 @llvm.amdgcn.workitem.id.y() + %2 = rocdl.workitem.id.y : !llvm.i32 + // CHECK: %3 = call i32 @llvm.amdgcn.workitem.id.z() + %3 = rocdl.workitem.id.z : !llvm.i32 + // CHECK: %4 = call i32 @llvm.amdgcn.workgroup.id.x() + %4 = rocdl.workgroup.id.x : !llvm.i32 + // CHECK: %5 = call i32 @llvm.amdgcn.workgroup.id.y() + %5 = rocdl.workgroup.id.y : !llvm.i32 + // CHECK: %6 = call i32 @llvm.amdgcn.workgroup.id.z() + %6 = rocdl.workgroup.id.z : !llvm.i32 + // CHECK: %7 = call i32 @__ockl_get_local_size(i32 0) + %7 = rocdl.workgroup.dim.x : !llvm.i32 + // CHECK: %8 = call i32 @__ockl_get_local_size(i32 1) + %8 = rocdl.workgroup.dim.y : !llvm.i32 + // CHECK: %9 = call i32 @__ockl_get_local_size(i32 2) + %9 = rocdl.workgroup.dim.z : !llvm.i32 + // CHECK: %10 = call i32 @__ockl_get_global_size(i32 0) + %10 = rocdl.grid.dim.x : !llvm.i32 + // CHECK: %11 = call i32 @__ockl_get_global_size(i32 1) + %11 = rocdl.grid.dim.y : !llvm.i32 + // CHECK: %12 = call i32 @__ockl_get_global_size(i32 2) + %12 = rocdl.grid.dim.z : !llvm.i32 + llvm.return %1 : !llvm.i32 +} + +// This function has the "amdgpu_kernel" calling convention after conversion. +// CHECK: amdgpu_kernel +func @kernel_func() attributes {gpu.kernel} { + llvm.return +} diff --git a/tools/mlir-opt/CMakeLists.txt b/tools/mlir-opt/CMakeLists.txt index 26f8885a2428..7eedd8bca069 100644 --- a/tools/mlir-opt/CMakeLists.txt +++ b/tools/mlir-opt/CMakeLists.txt @@ -24,6 +24,7 @@ set(LIBS MLIRFxpMathOps MLIRGPU MLIRGPUtoNVVMTransforms + MLIRGPUtoROCDLTransforms MLIRGPUtoSPIRVTransforms MLIRLinalg MLIRLLVMIR @@ -34,6 +35,7 @@ set(LIBS MLIRPass MLIRQuantizerTransforms MLIRQuantOps + MLIRROCDLIR MLIRSPIRV MLIRSPIRVConversion MLIRStandardOps diff --git a/tools/mlir-translate/CMakeLists.txt b/tools/mlir-translate/CMakeLists.txt index 50df9de8cae7..8f03de449579 100644 --- a/tools/mlir-translate/CMakeLists.txt +++ b/tools/mlir-translate/CMakeLists.txt @@ -9,6 +9,7 @@ set(LIBS MLIRStandardOps MLIRTargetLLVMIR MLIRTargetNVVMIR + MLIRTargetROCDLIR MLIRTransforms MLIRTranslation MLIRSupport