diff --git a/mlir/include/mlir-c/Dialect/GPU.h b/mlir/include/mlir-c/Dialect/GPU.h index c42ff61f9592c..321c1122c3370 100644 --- a/mlir/include/mlir-c/Dialect/GPU.h +++ b/mlir/include/mlir-c/Dialect/GPU.h @@ -37,6 +37,11 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target, uint32_t format, MlirStringRef objectStrRef, MlirAttribute mlirObjectProps); +MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetWithKernels( + MlirContext mlirCtx, MlirAttribute target, uint32_t format, + MlirStringRef objectStrRef, MlirAttribute mlirObjectProps, + MlirAttribute mlirKernelsAttr); + MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr); @@ -52,6 +57,12 @@ mlirGPUObjectAttrHasProperties(MlirAttribute mlirObjectAttr); MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetProperties(MlirAttribute mlirObjectAttr); +MLIR_CAPI_EXPORTED bool +mlirGPUObjectAttrHasKernels(MlirAttribute mlirObjectAttr); + +MLIR_CAPI_EXPORTED MlirAttribute +mlirGPUObjectAttrGetKernels(MlirAttribute mlirObjectAttr); + #ifdef __cplusplus } #endif diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index 6659f4a2c58e8..07879a0dab07f 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -16,6 +16,155 @@ include "mlir/Dialect/GPU/IR/GPUBase.td" include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" +//===----------------------------------------------------------------------===// +// GPU kernel metadata attribute +//===----------------------------------------------------------------------===// + +def GPU_KernelMetadataAttr : GPU_Attr<"KernelMetadata", "kernel_metadata"> { + let description = [{ + GPU attribute for storing metadata related to a compiled kernel. The + attribute contains the name and arguments type of the kernel. + + The attribute also contains optional parameters for storing the arguments + attributes as well as a dictionary for additional metadata, like occupancy + information or other function attributes. + + Note: The `arg_attrs` parameter is expected to follow all the constraints + imposed by the `mlir::FunctionOpInterface` interface. + + Examples: + ```mlir + #gpu.kernel_metadata<@kernel1, (i32) -> (), arg_attrs = [...], metadata = {reg_count = 255, ...}> + #gpu.kernel_metadata<@kernel2, (i32, f64) -> ()> + ``` + }]; + let parameters = (ins + "StringAttr":$name, + "Type":$function_type, + OptionalParameter<"ArrayAttr", "arguments attributes">:$arg_attrs, + OptionalParameter<"DictionaryAttr", "metadata dictionary">:$metadata + ); + let assemblyFormat = [{ + `<` $name `,` $function_type (`,` struct($arg_attrs, $metadata)^)? `>` + }]; + let builders = [ + AttrBuilderWithInferredContext<(ins "StringAttr":$name, + "Type":$functionType, + CArg<"ArrayAttr", "nullptr">:$argAttrs, + CArg<"DictionaryAttr", + "nullptr">:$metadata), [{ + assert(name && "invalid name"); + return $_get(name.getContext(), name, functionType, argAttrs, metadata); + }]>, + AttrBuilderWithInferredContext<(ins "FunctionOpInterface":$kernel, + CArg<"DictionaryAttr", + "nullptr">:$metadata)> + ]; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + /// Compare two kernels based on the name. + bool operator<(const KernelMetadataAttr& other) const { + return getName().getValue() < other.getName().getValue(); + } + + /// Returns the metadata attribute corresponding to `key` or `nullptr` + /// if missing. + Attribute getAttr(StringRef key) const { + DictionaryAttr attrs = getMetadata(); + return attrs ? attrs.get(key) : nullptr; + } + template + ConcreteAttr getAttr(StringRef key) const { + return llvm::dyn_cast_or_null(getAttr(key)); + } + Attribute getAttr(StringAttr key) const { + DictionaryAttr attrs = getMetadata(); + return attrs ? attrs.get(key) : nullptr; + } + template + ConcreteAttr getAttr(StringAttr key) const { + return llvm::dyn_cast_or_null(getAttr(key)); + } + + /// Returns the attribute dictionary at position `index`. + DictionaryAttr getArgAttrDict(unsigned index) { + ArrayAttr argArray = getArgAttrs(); + return argArray ? llvm::cast(argArray[index]) : nullptr; + } + + /// Return the specified attribute, if present, for the argument at 'index', + /// null otherwise. + Attribute getArgAttr(unsigned index, StringAttr name) { + DictionaryAttr argDict = getArgAttrDict(index); + return argDict ? argDict.get(name) : nullptr; + } + Attribute getArgAttr(unsigned index, StringRef name) { + DictionaryAttr argDict = getArgAttrDict(index); + return argDict ? argDict.get(name) : nullptr; + } + + /// Returns a new KernelMetadataAttr that contains `attrs` in the metadata dictionary. + KernelMetadataAttr appendMetadata(ArrayRef attrs) const; + }]; +} + +//===----------------------------------------------------------------------===// +// GPU kernel table attribute +//===----------------------------------------------------------------------===// + +def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { + let description = [{ + GPU attribute representing a list of `#gpu.kernel_metadata` attributes. This + attribute supports searching kernels by name. All kernels in the table must + have an unique name. + + Examples: + ```mlir + // Empty table. + #gpu.kernel_table<> + + // Table with a single kernel. + #gpu.kernel_table<[#gpu.kernel_metadata () >]> + + // Table with multiple kernels. + #gpu.kernel_table<[ + #gpu.kernel_metadata<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, + #gpu.kernel_metadata<"kernel1", (i32) -> ()> + ]> + ``` + }]; + let parameters = (ins + OptionalArrayRefParameter<"KernelMetadataAttr", "array of kernels">:$kernel_table + ); + let assemblyFormat = [{ + `<` (`[` qualified($kernel_table)^ `]`)? `>` + }]; + let builders = [ + AttrBuilder<(ins "ArrayRef":$kernels, + CArg<"bool", "false">:$isSorted)> + ]; + let skipDefaultBuilders = 1; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + llvm::ArrayRef::iterator begin() const { + return getKernelTable().begin(); + } + llvm::ArrayRef::iterator end() const { + return getKernelTable().end(); + } + size_t size() const { + return getKernelTable().size(); + } + bool empty() const { + return getKernelTable().empty(); + } + + /// Returns the kernel with name `key` or `nullptr` if not present. + KernelMetadataAttr lookup(StringRef key) const; + KernelMetadataAttr lookup(StringAttr key) const; + }]; +} + //===----------------------------------------------------------------------===// // GPU object attribute. //===----------------------------------------------------------------------===// @@ -36,8 +185,9 @@ def GPU_CompilationTargetEnum : GPU_I32Enum< def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { let description = [{ A GPU object attribute glues together a GPU target, the object kind, a - binary string with the object, and the object properties, encapsulating how - the object was generated and its properties with the object itself. + binary string with the object, the object properties, and kernel metadata, + encapsulating how the object was generated and its properties with the + object itself. There are four object formats: 1. `Offload`: represents generic objects not described by the other three @@ -55,6 +205,10 @@ def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { Object properties are specified through the `properties` dictionary attribute and can be used to define additional information. + + Kernel metadata is specified through the `kernels` parameter, and can be + used to specify additional information on a kernel by kernel basis. + The target attribute must implement or promise the `TargetAttrInterface` interface. @@ -63,16 +217,29 @@ def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { #gpu.object<#nvvm.target, properties = {O = 3 : i32}, assembly = "..."> // An assembly object with additional properties. #gpu.object<#rocdl.target, bin = "..."> // A binary object. #gpu.object<#nvvm.target, "..."> // A fatbin object. + #gpu.object<#nvvm.target, kernels = #gpu.kernel_table<...>, "..."> // An object with a kernel table. ``` }]; let parameters = (ins "Attribute":$target, DefaultValuedParameter<"CompilationTarget", "CompilationTarget::Fatbin">:$format, "StringAttr":$object, - OptionalParameter<"DictionaryAttr">:$properties + OptionalParameter<"DictionaryAttr">:$properties, + OptionalParameter<"KernelTableAttr">:$kernels ); + let builders = [ + AttrBuilderWithInferredContext<(ins "Attribute":$target, + "CompilationTarget":$format, + "StringAttr":$object, + CArg<"DictionaryAttr", "nullptr">:$properties, + CArg<"KernelTableAttr", "nullptr">:$kernels), [{ + assert(target && "invalid target"); + return $_get(target.getContext(), target, format, object, properties, kernels); + }]> + ]; let assemblyFormat = [{ `<` - $target `,` (`properties` `=` $properties ^ `,`)? + $target `,` (`properties` `=` $properties^ `,`)? + (`kernels` `=` $kernels^ `,`)? custom($format, $object) `>` }]; diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index 3c637a01b0e3b..3d2174c144815 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -14,6 +14,7 @@ #define MLIR_TARGET_LLVM_ROCDL_UTILS_H #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Support/LLVM.h" #include "mlir/Target/LLVM/ModuleToObject.h" @@ -107,6 +108,20 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// AMD GCN libraries to use when linking, the default is using none. AMDGCNLibraries deviceLibs = AMDGCNLibraries::None; }; + +/// Returns a map containing the `amdhsa.kernels` ELF metadata for each of the +/// kernels in the binary, or `std::nullopt` if the metadata couldn't be +/// retrieved. The map associates the name of the kernel with the list of named +/// attributes found in `amdhsa.kernels`. For more information on the ELF +/// metadata see: https://llvm.org/docs/AMDGPUUsage.html#amdhsa +std::optional> +getAMDHSAKernelsELFMetadata(Builder &builder, ArrayRef elfData); + +/// Returns a `#gpu.kernel_table` containing kernel metadata for each of the +/// kernels in `gpuModule`. If `elfData` is valid, then the `amdhsa.kernels` ELF +/// metadata will be added to the `#gpu.kernel_table`. +gpu::KernelTableAttr getKernelMetadata(Operation *gpuModule, + ArrayRef elfData = {}); } // namespace ROCDL } // namespace mlir diff --git a/mlir/lib/Bindings/Python/DialectGPU.cpp b/mlir/lib/Bindings/Python/DialectGPU.cpp index a9e339b50dabc..560a54bcd1591 100644 --- a/mlir/lib/Bindings/Python/DialectGPU.cpp +++ b/mlir/lib/Bindings/Python/DialectGPU.cpp @@ -48,17 +48,21 @@ PYBIND11_MODULE(_mlirDialectsGPU, m) { .def_classmethod( "get", [](py::object cls, MlirAttribute target, uint32_t format, - py::bytes object, std::optional mlirObjectProps) { + py::bytes object, std::optional mlirObjectProps, + std::optional mlirKernelsAttr) { py::buffer_info info(py::buffer(object).request()); MlirStringRef objectStrRef = mlirStringRefCreate(static_cast(info.ptr), info.size); - return cls(mlirGPUObjectAttrGet( + return cls(mlirGPUObjectAttrGetWithKernels( mlirAttributeGetContext(target), target, format, objectStrRef, mlirObjectProps.has_value() ? *mlirObjectProps + : MlirAttribute{nullptr}, + mlirKernelsAttr.has_value() ? *mlirKernelsAttr : MlirAttribute{nullptr})); }, "cls"_a, "target"_a, "format"_a, "object"_a, - "properties"_a = py::none(), "Gets a gpu.object from parameters.") + "properties"_a = py::none(), "kernels"_a = py::none(), + "Gets a gpu.object from parameters.") .def_property_readonly( "target", [](MlirAttribute self) { return mlirGPUObjectAttrGetTarget(self); }) @@ -71,9 +75,16 @@ PYBIND11_MODULE(_mlirDialectsGPU, m) { MlirStringRef stringRef = mlirGPUObjectAttrGetObject(self); return py::bytes(stringRef.data, stringRef.length); }) - .def_property_readonly("properties", [](MlirAttribute self) { - if (mlirGPUObjectAttrHasProperties(self)) - return py::cast(mlirGPUObjectAttrGetProperties(self)); + .def_property_readonly("properties", + [](MlirAttribute self) { + if (mlirGPUObjectAttrHasProperties(self)) + return py::cast( + mlirGPUObjectAttrGetProperties(self)); + return py::none().cast(); + }) + .def_property_readonly("kernels", [](MlirAttribute self) { + if (mlirGPUObjectAttrHasKernels(self)) + return py::cast(mlirGPUObjectAttrGetKernels(self)); return py::none().cast(); }); } diff --git a/mlir/lib/CAPI/Dialect/GPU.cpp b/mlir/lib/CAPI/Dialect/GPU.cpp index 0acebb2300429..e4796ed1499ea 100644 --- a/mlir/lib/CAPI/Dialect/GPU.cpp +++ b/mlir/lib/CAPI/Dialect/GPU.cpp @@ -43,9 +43,28 @@ MlirAttribute mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target, DictionaryAttr objectProps; if (mlirObjectProps.ptr != nullptr) objectProps = llvm::cast(unwrap(mlirObjectProps)); - return wrap(gpu::ObjectAttr::get(ctx, unwrap(target), - static_cast(format), - StringAttr::get(ctx, object), objectProps)); + return wrap(gpu::ObjectAttr::get( + ctx, unwrap(target), static_cast(format), + StringAttr::get(ctx, object), objectProps, nullptr)); +} + +MlirAttribute mlirGPUObjectAttrGetWithKernels(MlirContext mlirCtx, + MlirAttribute target, + uint32_t format, + MlirStringRef objectStrRef, + MlirAttribute mlirObjectProps, + MlirAttribute mlirKernelsAttr) { + MLIRContext *ctx = unwrap(mlirCtx); + llvm::StringRef object = unwrap(objectStrRef); + DictionaryAttr objectProps; + if (mlirObjectProps.ptr != nullptr) + objectProps = llvm::cast(unwrap(mlirObjectProps)); + gpu::KernelTableAttr kernels; + if (mlirKernelsAttr.ptr != nullptr) + kernels = llvm::cast(unwrap(mlirKernelsAttr)); + return wrap(gpu::ObjectAttr::get( + ctx, unwrap(target), static_cast(format), + StringAttr::get(ctx, object), objectProps, kernels)); } MlirAttribute mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr) { @@ -78,3 +97,15 @@ MlirAttribute mlirGPUObjectAttrGetProperties(MlirAttribute mlirObjectAttr) { llvm::cast(unwrap(mlirObjectAttr)); return wrap(objectAttr.getProperties()); } + +bool mlirGPUObjectAttrHasKernels(MlirAttribute mlirObjectAttr) { + gpu::ObjectAttr objectAttr = + llvm::cast(unwrap(mlirObjectAttr)); + return objectAttr.getKernels() != nullptr; +} + +MlirAttribute mlirGPUObjectAttrGetKernels(MlirAttribute mlirObjectAttr) { + gpu::ObjectAttr objectAttr = + llvm::cast(unwrap(mlirObjectAttr)); + return wrap(objectAttr.getKernels()); +} diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index a59952228ef6e..e45ba7838b453 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2091,7 +2091,8 @@ void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results, LogicalResult ObjectAttr::verify(function_ref emitError, Attribute target, CompilationTarget format, - StringAttr object, DictionaryAttr properties) { + StringAttr object, DictionaryAttr properties, + KernelTableAttr kernels) { if (!target) return emitError() << "the target attribute cannot be null"; if (target.hasPromiseOrImplementsInterface()) @@ -2177,6 +2178,113 @@ LogicalResult gpu::DynamicSharedMemoryOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// GPU KernelMetadataAttr +//===----------------------------------------------------------------------===// + +KernelMetadataAttr KernelMetadataAttr::get(FunctionOpInterface kernel, + DictionaryAttr metadata) { + assert(kernel && "invalid kernel"); + return get(kernel.getNameAttr(), kernel.getFunctionType(), + kernel.getAllArgAttrs(), metadata); +} + +KernelMetadataAttr +KernelMetadataAttr::getChecked(function_ref emitError, + FunctionOpInterface kernel, + DictionaryAttr metadata) { + assert(kernel && "invalid kernel"); + return getChecked(emitError, kernel.getNameAttr(), kernel.getFunctionType(), + kernel.getAllArgAttrs(), metadata); +} + +KernelMetadataAttr +KernelMetadataAttr::appendMetadata(ArrayRef attrs) const { + if (attrs.empty()) + return *this; + NamedAttrList attrList; + if (DictionaryAttr dict = getMetadata()) + attrList.append(dict); + attrList.append(attrs); + return KernelMetadataAttr::get(getName(), getFunctionType(), getArgAttrs(), + attrList.getDictionary(getContext())); +} + +LogicalResult +KernelMetadataAttr::verify(function_ref emitError, + StringAttr name, Type functionType, + ArrayAttr argAttrs, DictionaryAttr metadata) { + if (name.empty()) + return emitError() << "the kernel name can't be empty"; + if (argAttrs) { + if (llvm::any_of(argAttrs, [](Attribute attr) { + return !llvm::isa(attr); + })) + return emitError() + << "all attributes in the array must be a dictionary attribute"; + } + return success(); +} + +//===----------------------------------------------------------------------===// +// GPU KernelTableAttr +//===----------------------------------------------------------------------===// + +KernelTableAttr KernelTableAttr::get(MLIRContext *context, + ArrayRef kernels, + bool isSorted) { + // Note that `is_sorted` is always only invoked once even with assertions ON. + assert((!isSorted || llvm::is_sorted(kernels)) && + "expected a sorted kernel array"); + // Immediately return the attribute if the array is sorted. + if (isSorted || llvm::is_sorted(kernels)) + return Base::get(context, kernels); + // Sort the array. + SmallVector kernelsTmp(kernels); + llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end()); + return Base::get(context, kernelsTmp); +} + +KernelTableAttr KernelTableAttr::getChecked( + function_ref emitError, MLIRContext *context, + ArrayRef kernels, bool isSorted) { + // Note that `is_sorted` is always only invoked once even with assertions ON. + assert((!isSorted || llvm::is_sorted(kernels)) && + "expected a sorted kernel array"); + // Immediately return the attribute if the array is sorted. + if (isSorted || llvm::is_sorted(kernels)) + return Base::getChecked(emitError, context, kernels); + // Sort the array. + SmallVector kernelsTmp(kernels); + llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end()); + return Base::getChecked(emitError, context, kernelsTmp); +} + +LogicalResult +KernelTableAttr::verify(function_ref emitError, + ArrayRef kernels) { + if (kernels.size() < 2) + return success(); + // Check that the kernels are uniquely named. + if (std::adjacent_find(kernels.begin(), kernels.end(), + [](KernelMetadataAttr l, KernelMetadataAttr r) { + return l.getName() == r.getName(); + }) != kernels.end()) { + return emitError() << "expected all kernels to be uniquely named"; + } + return success(); +} + +KernelMetadataAttr KernelTableAttr::lookup(StringRef key) const { + auto [iterator, found] = impl::findAttrSorted(begin(), end(), key); + return found ? *iterator : KernelMetadataAttr(); +} + +KernelMetadataAttr KernelTableAttr::lookup(StringAttr key) const { + auto [iterator, found] = impl::findAttrSorted(begin(), end(), key); + return found ? *iterator : KernelMetadataAttr(); +} + //===----------------------------------------------------------------------===// // GPU target options //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 93dc5ff9d35b7..bc14c568e46be 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -110,10 +110,12 @@ endif() add_mlir_dialect_library(MLIRROCDLTarget ROCDL/Target.cpp + ROCDL/Utils.cpp OBJECT LINK_COMPONENTS + FrontendOffloading MCParser ${AMDGPU_LIBS} diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp index a75b7f92ed8dc..806c405ac17df 100644 --- a/mlir/lib/Target/LLVM/NVVM/Target.cpp +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -604,5 +604,5 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module, return builder.getAttr( attribute, format, builder.getStringAttr(StringRef(object.data(), object.size())), - objectProps); + objectProps, /*kernels=*/nullptr); } diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index e32a0c7e14e85..d8a79a7e80d64 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -506,13 +506,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, gpu::CompilationTarget format = options.getCompilationTarget(); // If format is `fatbin` transform it to binary as `fatbin` is not yet // supported. - if (format > gpu::CompilationTarget::Binary) + gpu::KernelTableAttr kernels; + if (format > gpu::CompilationTarget::Binary) { format = gpu::CompilationTarget::Binary; - + kernels = ROCDL::getKernelMetadata(module, object); + } DictionaryAttr properties{}; Builder builder(attribute.getContext()); - return builder.getAttr( - attribute, format, - builder.getStringAttr(StringRef(object.data(), object.size())), - properties); + StringAttr objectStr = + builder.getStringAttr(StringRef(object.data(), object.size())); + return builder.getAttr(attribute, format, objectStr, + properties, kernels); } diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp new file mode 100644 index 0000000000000..04b1b22279e5d --- /dev/null +++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp @@ -0,0 +1,87 @@ +//===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines ROCDL target related utility classes and functions. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVM/ROCDL/Utils.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" + +#include "llvm/ADT/StringMap.h" +#include "llvm/Frontend/Offloading/Utility.h" + +using namespace mlir; +using namespace mlir::ROCDL; + +std::optional> +mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder, + ArrayRef elfData) { + uint16_t elfABIVersion; + llvm::StringMap kernels; + llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), + "buffer"); + // Get the metadata. + llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( + buffer, kernels, elfABIVersion); + // Return `nullopt` if the metadata couldn't be retrieved. + if (error) { + llvm::consumeError(std::move(error)); + return std::nullopt; + } + // Helper lambda for converting values. + auto getI32Array = [&builder](const uint32_t *array) { + return builder.getDenseI32ArrayAttr({static_cast(array[0]), + static_cast(array[1]), + static_cast(array[2])}); + }; + DenseMap kernelMD; + for (const auto &[name, kernel] : kernels) { + NamedAttrList attrs; + // Add kernel metadata. + attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount)); + attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount)); + attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount)); + attrs.append("sgpr_spill_count", + builder.getI64IntegerAttr(kernel.SGPRSpillCount)); + attrs.append("vgpr_spill_count", + builder.getI64IntegerAttr(kernel.VGPRSpillCount)); + attrs.append("wavefront_size", + builder.getI64IntegerAttr(kernel.WavefrontSize)); + attrs.append("max_flat_workgroup_size", + builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize)); + attrs.append("group_segment_fixed_size", + builder.getI64IntegerAttr(kernel.GroupSegmentList)); + attrs.append("private_segment_fixed_size", + builder.getI64IntegerAttr(kernel.PrivateSegmentSize)); + attrs.append("reqd_workgroup_size", + getI32Array(kernel.RequestedWorkgroupSize)); + attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint)); + kernelMD[builder.getStringAttr(name)] = std::move(attrs); + } + return std::move(kernelMD); +} + +gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule, + ArrayRef elfData) { + auto module = cast(gpuModule); + Builder builder(module.getContext()); + SmallVector kernels; + std::optional> mdMapOrNull = + getAMDHSAKernelsELFMetadata(builder, elfData); + for (auto funcOp : module.getBody()->getOps()) { + if (!funcOp->getDiscardableAttr("rocdl.kernel")) + continue; + kernels.push_back(gpu::KernelMetadataAttr::get( + funcOp, mdMapOrNull ? builder.getDictionaryAttr( + mdMapOrNull->lookup(funcOp.getNameAttr())) + : nullptr)); + } + return gpu::KernelTableAttr::get(gpuModule->getContext(), kernels); +} diff --git a/mlir/lib/Target/SPIRV/Target.cpp b/mlir/lib/Target/SPIRV/Target.cpp index d48548bf9709c..dd128e254aa0d 100644 --- a/mlir/lib/Target/SPIRV/Target.cpp +++ b/mlir/lib/Target/SPIRV/Target.cpp @@ -98,5 +98,5 @@ SPIRVTargetAttrImpl::createObject(Attribute attribute, Operation *module, return builder.getAttr( attribute, format, builder.getStringAttr(StringRef(object.data(), object.size())), - objectProps); + objectProps, /*kernels=*/nullptr); } diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 20c1c4cf8a2d0..fd7618020b5d8 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -848,3 +848,15 @@ module attributes {gpu.container_module} { gpu.module @kernel <> { } } + +// ----- + +gpu.binary @binary [#gpu.object<#rocdl.target, + // expected-error@+1{{expected all kernels to be uniquely named}} + kernels = #gpu.kernel_table<[ + #gpu.kernel_metadata<"kernel", (i32) -> ()>, + #gpu.kernel_metadata<"kernel", (i32, f32) -> (), metadata = {sgpr_count = 255}> + // expected-error@below{{failed to parse GPU_ObjectAttr parameter 'kernels' which is to be a `KernelTableAttr`}} + ]>, + bin = "BLOB"> + ] diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index ba7897f4e80cb..b9c0a0e79e8f2 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -441,3 +441,26 @@ gpu.module @module_with_two_target [#nvvm.target, #rocdl.target gpu.module @module_with_offload_handler <#gpu.select_object<0>> [#nvvm.target] { } + +// Test kernel attributes +gpu.binary @kernel_attrs_1 [ + #gpu.object<#rocdl.target, + kernels = #gpu.kernel_table<[ + #gpu.kernel_metadata<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, + #gpu.kernel_metadata<"kernel1", (i32) -> (), arg_attrs = [{llvm.read_only}]> + ]>, + bin = "BLOB"> + ] + +// Verify the kernels are sorted +// CHECK-LABEL: gpu.binary @kernel_attrs_2 +gpu.binary @kernel_attrs_2 [ + // CHECK: [#gpu.kernel_metadata<"a_kernel", () -> ()>, #gpu.kernel_metadata<"m_kernel", () -> ()>, #gpu.kernel_metadata<"z_kernel", () -> ()>] + #gpu.object<#rocdl.target, + kernels = #gpu.kernel_table<[ + #gpu.kernel_metadata<"z_kernel", () -> ()>, + #gpu.kernel_metadata<"m_kernel", () -> ()>, + #gpu.kernel_metadata<"a_kernel", () -> ()> + ]>, + bin = "BLOB"> + ] diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index aded35b04aa1e..26ee9f34cb332 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -55,3 +55,12 @@ def testObjectAttr(): # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50"> print(o) assert o.object == object + + object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef" + kernelTable = Attribute.parse( + '#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>' + ) + o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable) + # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf"> + print(o) + assert o.kernels == kernelTable diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp index 33291bc4bcaed..43fa3d850d9e2 100644 --- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp +++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp @@ -158,3 +158,69 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToBinary)) { ASSERT_FALSE(object->empty()); } } + +// Test ROCDL metadata. +TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) { + if (!hasROCMTools()) + GTEST_SKIP() << "ROCm installation not found, skipping test."; + + MLIRContext context(registry); + + // MLIR module used for the tests. + const std::string moduleStr = R"mlir( + gpu.module @rocdl_test { + llvm.func @rocdl_kernel_1(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + llvm.func @rocdl_kernel_0(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + llvm.func @rocdl_kernel_2(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + llvm.func @a_kernel(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + })mlir"; + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create a ROCDL target. + ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Binary); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, options); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_FALSE(object->empty()); + if (!object) + continue; + // Get the metadata. + gpu::KernelTableAttr metadata = + ROCDL::getKernelMetadata(gpuModule, *object); + ASSERT_TRUE(metadata != nullptr); + // There should be 4 kernels. + ASSERT_TRUE(metadata.size() == 4); + // Check that the lookup method returns finds the kernel. + ASSERT_TRUE(metadata.lookup("a_kernel") != nullptr); + ASSERT_TRUE(metadata.lookup("rocdl_kernel_0") != nullptr); + // Check that the kernel doesn't exist. + ASSERT_TRUE(metadata.lookup("not_existent_kernel") == nullptr); + // Test the `KernelMetadataAttr` iterators. + for (gpu::KernelMetadataAttr kernel : metadata) { + // Check that the ELF metadata is present. + ASSERT_TRUE(kernel.getMetadata() != nullptr); + // Verify that `sgpr_count` is present and it is an integer attribute. + ASSERT_TRUE(kernel.getAttr("sgpr_count") != nullptr); + // Verify that `vgpr_count` is present and it is an integer attribute. + ASSERT_TRUE(kernel.getAttr("vgpr_count") != nullptr); + } + } +} diff --git a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp index 37dbfe6203687..aaa281e07933b 100644 --- a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp +++ b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp @@ -116,7 +116,7 @@ TargetAttrImpl::createObject(Attribute attribute, Operation *module, module->getContext(), attribute, gpu::CompilationTarget::Offload, StringAttr::get(module->getContext(), StringRef(object.data(), object.size())), - module->getAttrDictionary()); + module->getAttrDictionary(), /*kernels=*/nullptr); } TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(TargetAttrAPI)) {