From 617b301558597940183304f004426232f13ec12f Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Wed, 12 Jun 2024 20:49:09 +0000 Subject: [PATCH 01/15] move to gpu --- mlir/include/mlir-c/Dialect/GPU.h | 3 +- .../mlir/Dialect/GPU/IR/CompilationAttrs.td | 147 +++++++++++- mlir/include/mlir/Target/LLVM/ROCDL/Utils.h | 7 + mlir/lib/CAPI/Dialect/GPU.cpp | 12 +- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 37 ++- mlir/lib/Target/LLVM/CMakeLists.txt | 2 + mlir/lib/Target/LLVM/NVVM/Target.cpp | 2 +- mlir/lib/Target/LLVM/ROCDL/Target.cpp | 6 +- mlir/lib/Target/LLVM/ROCDL/Utils.cpp | 226 ++++++++++++++++++ mlir/lib/Target/SPIRV/Target.cpp | 2 +- mlir/test/Dialect/GPU/ops.mlir | 6 + .../Target/LLVM/SerializeROCDLTarget.cpp | 45 ++++ 12 files changed, 483 insertions(+), 12 deletions(-) create mode 100644 mlir/lib/Target/LLVM/ROCDL/Utils.cpp diff --git a/mlir/include/mlir-c/Dialect/GPU.h b/mlir/include/mlir-c/Dialect/GPU.h index c42ff61f9592c..0c2a603ed9b89 100644 --- a/mlir/include/mlir-c/Dialect/GPU.h +++ b/mlir/include/mlir-c/Dialect/GPU.h @@ -35,7 +35,8 @@ MLIR_CAPI_EXPORTED bool mlirAttributeIsAGPUObjectAttr(MlirAttribute attr); MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target, uint32_t format, - MlirStringRef objectStrRef, MlirAttribute mlirObjectProps); + MlirStringRef objectStrRef, MlirAttribute mlirObjectProps, + MlirAttribute mlirKernelsAttr); MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr); diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index 6659f4a2c58e8..f4037b55c85b4 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -16,6 +16,136 @@ include "mlir/Dialect/GPU/IR/GPUBase.td" include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" +//===----------------------------------------------------------------------===// +// GPU kernel attribute +//===----------------------------------------------------------------------===// + +def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { + let description = [{ + GPU attribute for storing metadata related to a compiled kernel. It + contains the attribute dictionary of the LLVM function used to generate the + kernel, as well as an optional dictionary for additional metadata, like + occupancy information. + + Examples: + ```mlir + #gpu.kernel<{sym_name = "test_fusion__part_0", ...}, + metadata = {reg_count = 255, ...}> + ``` + }]; + let parameters = (ins + "DictionaryAttr":$func_attrs, + OptionalParameter<"DictionaryAttr", "metadata dictionary">:$metadata + ); + let assemblyFormat = [{ + `<` $func_attrs (`,` `metadata` `=` $metadata^ )? `>` + }]; + let builders = [ + AttrBuilderWithInferredContext<(ins "DictionaryAttr":$funcAttrs, + CArg<"DictionaryAttr", + "nullptr">:$metadata), [{ + assert(funcAttrs && "invalid function attributes dictionary"); + return $_get(funcAttrs.getContext(), funcAttrs, metadata); + }]>, + AttrBuilderWithInferredContext<(ins "Operation*":$kernel, + CArg<"DictionaryAttr", + "nullptr">:$metadata)> + ]; + let extraClassDeclaration = [{ + /// Returns the function attribute corresponding to key or nullptr if missing. + Attribute getAttr(StringRef key) const { + return getFuncAttrs().get(key); + } + template + ConcreteAttr getAttr(StringRef key) const { + return llvm::dyn_cast_or_null(getAttr(key)); + } + Attribute getAttr(StringAttr key) const; + template + ConcreteAttr getAttr(StringAttr key) const { + return llvm::dyn_cast_or_null(getAttr(key)); + } + + /// Returns the name of the kernel. + StringAttr getName() const { + return getAttr("sym_name"); + } + + /// Returns the metadta attribute corresponding to key or nullptr if missing. + Attribute getMDAttr(StringRef key) const { + if (DictionaryAttr attrs = getMetadata()) + return attrs.get(key); + return nullptr; + } + template + ConcreteAttr getMDAttr(StringRef key) const { + return llvm::dyn_cast_or_null(getMDAttr(key)); + } + Attribute getMDAttr(StringAttr key) const; + template + ConcreteAttr getMDAttr(StringAttr key) const { + return llvm::dyn_cast_or_null(getMDAttr(key)); + } + + /// Helper function for appending metadata to a kernel attribute. + KernelAttr appendMetadata(ArrayRef attrs) const; + }]; +} + +//===----------------------------------------------------------------------===// +// GPU kernel table attribute +//===----------------------------------------------------------------------===// + +def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { + let description = [{ + GPU attribute representing a table of kernels metadata. All the attributes + in the dictionary must be of type `#gpu.kernel`. + + Examples: + ```mlir + #gpu.kernel_table<{kernel0 = #gpu.kernel<...>}> + ``` + }]; + let parameters = (ins + "DictionaryAttr":$kernel_table + ); + let assemblyFormat = [{ + `<` $kernel_table `>` + }]; + let builders = [ + AttrBuilderWithInferredContext<(ins "DictionaryAttr":$kernel_table), [{ + assert(kernel_table && "invalid kernel table"); + return $_get(kernel_table.getContext(), kernel_table); + }]> + ]; + let skipDefaultBuilders = 1; + let genVerifyDecl = 1; + let extraClassDeclaration = [{ + /// Helper iterator class for traversing the kernel table. + struct KernelIterator + : llvm::mapped_iterator_base::iterator, + std::pair> { + using llvm::mapped_iterator_base< + KernelIterator, llvm::ArrayRef::iterator, + std::pair>::mapped_iterator_base; + /// Map the iterator to the kernel name and a KernelAttribute. + std::pair mapElement(NamedAttribute attr) const { + return {attr.getName(), llvm::cast(attr.getValue())}; + } + }; + auto begin() const { + return KernelIterator(getKernelTable().begin()); + } + auto end() const { + return KernelIterator(getKernelTable().end()); + } + size_t size() const { + return getKernelTable().size(); + } + }]; +} + //===----------------------------------------------------------------------===// // GPU object attribute. //===----------------------------------------------------------------------===// @@ -63,16 +193,29 @@ def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { #gpu.object<#nvvm.target, properties = {O = 3 : i32}, assembly = "..."> // An assembly object with additional properties. #gpu.object<#rocdl.target, bin = "..."> // A binary object. #gpu.object<#nvvm.target, "..."> // A fatbin object. + #gpu.object<#nvvm.target, kernels = #gpu.kernel_table<...>, "..."> // An object with a kernel table. ``` }]; let parameters = (ins "Attribute":$target, DefaultValuedParameter<"CompilationTarget", "CompilationTarget::Fatbin">:$format, "StringAttr":$object, - OptionalParameter<"DictionaryAttr">:$properties + OptionalParameter<"DictionaryAttr">:$properties, + OptionalParameter<"KernelTableAttr">:$kernels ); + let builders = [ + AttrBuilderWithInferredContext<(ins "Attribute":$target, + "CompilationTarget":$format, + "StringAttr":$object, + CArg<"DictionaryAttr", "nullptr">:$properties, + CArg<"KernelTableAttr", "nullptr">:$kernels), [{ + assert(target && "invalid target"); + return $_get(target.getContext(), target, format, object, properties, kernels); + }]> + ]; let assemblyFormat = [{ `<` - $target `,` (`properties` `=` $properties ^ `,`)? + $target `,` (`properties` `=` $properties^ `,`)? + (`kernels` `=` $kernels^ `,`)? custom($format, $object) `>` }]; diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index 3c637a01b0e3b..904d60a2a75db 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -14,6 +14,7 @@ #define MLIR_TARGET_LLVM_ROCDL_UTILS_H #include "mlir/Dialect/GPU/IR/CompilationInterfaces.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "mlir/Support/LLVM.h" #include "mlir/Target/LLVM/ModuleToObject.h" @@ -107,6 +108,12 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { /// AMD GCN libraries to use when linking, the default is using none. AMDGCNLibraries deviceLibs = AMDGCNLibraries::None; }; + +/// Returns a `#gpu.kernel_table` containing kernel metadata for each of the +/// kernels in `gpuModule`. If `elfData` is valid, then the `amdhsa.kernels` ELF +/// metadata will be added to the `#gpu.kernel_table`. +gpu::KernelTableAttr getAMDHSAKernelsMetadata(Operation *gpuModule, + ArrayRef elfData = {}); } // namespace ROCDL } // namespace mlir diff --git a/mlir/lib/CAPI/Dialect/GPU.cpp b/mlir/lib/CAPI/Dialect/GPU.cpp index 0acebb2300429..ffe60658cb2ce 100644 --- a/mlir/lib/CAPI/Dialect/GPU.cpp +++ b/mlir/lib/CAPI/Dialect/GPU.cpp @@ -37,15 +37,19 @@ bool mlirAttributeIsAGPUObjectAttr(MlirAttribute attr) { MlirAttribute mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target, uint32_t format, MlirStringRef objectStrRef, - MlirAttribute mlirObjectProps) { + MlirAttribute mlirObjectProps, + MlirAttribute mlirKernelsAttr) { MLIRContext *ctx = unwrap(mlirCtx); llvm::StringRef object = unwrap(objectStrRef); DictionaryAttr objectProps; if (mlirObjectProps.ptr != nullptr) objectProps = llvm::cast(unwrap(mlirObjectProps)); - return wrap(gpu::ObjectAttr::get(ctx, unwrap(target), - static_cast(format), - StringAttr::get(ctx, object), objectProps)); + gpu::KernelTableAttr kernels; + if (mlirKernelsAttr.ptr != nullptr) + kernels = llvm::cast(unwrap(mlirKernelsAttr)); + return wrap(gpu::ObjectAttr::get( + ctx, unwrap(target), static_cast(format), + StringAttr::get(ctx, object), objectProps, kernels)); } MlirAttribute mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr) { diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index a59952228ef6e..de69465c2ec6f 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2091,7 +2091,8 @@ void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results, LogicalResult ObjectAttr::verify(function_ref emitError, Attribute target, CompilationTarget format, - StringAttr object, DictionaryAttr properties) { + StringAttr object, DictionaryAttr properties, + KernelTableAttr kernels) { if (!target) return emitError() << "the target attribute cannot be null"; if (target.hasPromiseOrImplementsInterface()) @@ -2177,6 +2178,40 @@ LogicalResult gpu::DynamicSharedMemoryOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// GPU KernelAttr +//===----------------------------------------------------------------------===// + +KernelAttr KernelAttr::get(Operation *kernelOp, DictionaryAttr metadata) { + assert(kernelOp && "invalid kernel"); + return get(kernelOp->getAttrDictionary(), metadata); +} + +KernelAttr KernelAttr::appendMetadata(ArrayRef attrs) const { + if (attrs.empty()) + return *this; + NamedAttrList attrList(attrs); + attrList.append(getMetadata()); + return KernelAttr::get(getFuncAttrs(), attrList.getDictionary(getContext())); +} + +//===----------------------------------------------------------------------===// +// GPU KernelTableAttr +//===----------------------------------------------------------------------===// + +LogicalResult +KernelTableAttr::verify(function_ref emitError, + DictionaryAttr dict) { + if (!dict) + return emitError() << "table cannot be null"; + if (llvm::any_of(dict, [](NamedAttribute attr) { + return !llvm::isa(attr.getValue()); + })) + return emitError() + << "all the dictionary values must be `#gpu.kernel` attributes"; + return success(); +} + //===----------------------------------------------------------------------===// // GPU target options //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 93dc5ff9d35b7..4999ce00ba5bc 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -110,11 +110,13 @@ endif() add_mlir_dialect_library(MLIRROCDLTarget ROCDL/Target.cpp + ROCDL/Utils.cpp OBJECT LINK_COMPONENTS MCParser + Object ${AMDGPU_LIBS} LINK_LIBS PUBLIC diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp index a75b7f92ed8dc..7e8e3ff6ed59a 100644 --- a/mlir/lib/Target/LLVM/NVVM/Target.cpp +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -604,5 +604,5 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module, return builder.getAttr( attribute, format, builder.getStringAttr(StringRef(object.data(), object.size())), - objectProps); + objectProps, nullptr); } diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index e32a0c7e14e85..47dfb3d63bd4a 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -512,7 +512,9 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, DictionaryAttr properties{}; Builder builder(attribute.getContext()); return builder.getAttr( - attribute, format, + attribute, + format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary + : format, builder.getStringAttr(StringRef(object.data(), object.size())), - properties); + properties, nullptr); } diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp new file mode 100644 index 0000000000000..5029293bae2a6 --- /dev/null +++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp @@ -0,0 +1,226 @@ +//===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This files defines ROCDL target related utility classes and functions. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVM/ROCDL/Utils.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/ROCDLDialect.h" + +#include "llvm/ADT/StringMap.h" +#include "llvm/BinaryFormat/MsgPackDocument.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/AMDGPUMetadata.h" + +using namespace mlir; +using namespace mlir::ROCDL; + +/// Search the ELF object and return an object containing the `amdhsa.kernels` +/// metadata note. Function adapted from: +/// llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp Also see +/// `amdhsa.kernels`: +/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata +template +static std::optional +getAMDHSANote(llvm::object::ELFObjectFile &elfObj) { + using namespace llvm; + using namespace llvm::object; + using namespace llvm::ELF; + const ELFFile &elf = elfObj.getELFFile(); + auto secOrErr = elf.sections(); + if (!secOrErr) + return std::nullopt; + ArrayRef sections = *secOrErr; + for (auto section : sections) { + if (section.sh_type != ELF::SHT_NOTE) + continue; + size_t align = std::max(static_cast(section.sh_addralign), 4u); + Error err = Error::success(); + for (const typename ELFT::Note note : elf.notes(section, err)) { + StringRef name = note.getName(); + if (name != "AMDGPU") + continue; + uint32_t type = note.getType(); + if (type != ELF::NT_AMDGPU_METADATA) + continue; + ArrayRef desc = note.getDesc(align); + StringRef msgPackString = + StringRef(reinterpret_cast(desc.data()), desc.size()); + msgpack::Document msgPackDoc; + if (!msgPackDoc.readFromBlob(msgPackString, /*Multi=*/false)) + return std::nullopt; + if (msgPackDoc.getRoot().isScalar()) + return std::nullopt; + return std::optional(std::move(msgPackDoc)); + } + } + return std::nullopt; +} + +/// Return the `amdhsa.kernels` metadata in the ELF object or std::nullopt on +/// failure. This is a helper function that casts a generic `ObjectFile` to the +/// appropiate `ELFObjectFile`. +static std::optional +getAMDHSANote(ArrayRef elfData) { + using namespace llvm; + using namespace llvm::object; + if (elfData.empty()) + return std::nullopt; + MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), "buffer"); + Expected> objOrErr = + ObjectFile::createELFObjectFile(buffer); + if (!objOrErr || !objOrErr.get()) { + // Drop the error. + llvm::consumeError(objOrErr.takeError()); + return std::nullopt; + } + ObjectFile &elf = *(objOrErr.get()); + std::optional metadata; + if (auto *obj = dyn_cast(&elf)) + metadata = getAMDHSANote(*obj); + else if (auto *obj = dyn_cast(&elf)) + metadata = getAMDHSANote(*obj); + else if (auto *obj = dyn_cast(&elf)) + metadata = getAMDHSANote(*obj); + else if (auto *obj = dyn_cast(&elf)) + metadata = getAMDHSANote(*obj); + return metadata; +} + +/// Utility functions for converting `llvm::msgpack::DocNode` nodes. +static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node); +static Attribute convertNode(Builder &builder, + llvm::msgpack::MapDocNode &node) { + NamedAttrList attrs; + for (auto kv : node) { + if (!kv.first.isString()) + continue; + if (Attribute attr = convertNode(builder, kv.second)) { + auto key = kv.first.getString(); + key.consume_front("."); + key.consume_back("."); + attrs.append(key, attr); + } + } + if (attrs.empty()) + return nullptr; + return builder.getDictionaryAttr(attrs); +} + +static Attribute convertNode(Builder &builder, + llvm::msgpack::ArrayDocNode &node) { + using NodeKind = llvm::msgpack::Type; + // Use `DenseIntAttr` if we know all the attrs are ints. + if (llvm::all_of(node, [](llvm::msgpack::DocNode &n) { + auto kind = n.getKind(); + return kind == NodeKind::Int || kind == NodeKind::UInt; + })) { + SmallVector values; + for (llvm::msgpack::DocNode &n : node) { + auto kind = n.getKind(); + if (kind == NodeKind::Int) + values.push_back(n.getInt()); + else if (kind == NodeKind::UInt) + values.push_back(n.getUInt()); + } + return builder.getDenseI64ArrayAttr(values); + } + // Convert the array. + SmallVector attrs; + for (llvm::msgpack::DocNode &n : node) { + if (Attribute attr = convertNode(builder, n)) + attrs.push_back(attr); + } + if (attrs.empty()) + return nullptr; + return builder.getArrayAttr(attrs); +} + +static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node) { + using namespace llvm::msgpack; + using NodeKind = llvm::msgpack::Type; + switch (node.getKind()) { + case NodeKind::Int: + return builder.getI64IntegerAttr(node.getInt()); + case NodeKind::UInt: + return builder.getI64IntegerAttr(node.getUInt()); + case NodeKind::Boolean: + return builder.getI64IntegerAttr(node.getBool()); + case NodeKind::String: + return builder.getStringAttr(node.getString()); + case NodeKind::Array: + return convertNode(builder, node.getArray()); + case NodeKind::Map: + return convertNode(builder, node.getMap()); + default: + return nullptr; + } +} + +/// The following function should succeed for Code object V3 and above. +static llvm::StringMap getELFMetadata(Builder &builder, + ArrayRef elfData) { + std::optional metadata = getAMDHSANote(elfData); + if (!metadata) + return {}; + llvm::StringMap kernelMD; + llvm::msgpack::DocNode &root = (metadata)->getRoot(); + // Fail if `root` is not a map -it should be for AMD Obj Ver 3. + if (!root.isMap()) + return kernelMD; + auto &kernels = root.getMap()["amdhsa.kernels"]; + // Fail if `amdhsa.kernels` is not an array. + if (!kernels.isArray()) + return kernelMD; + // Convert each of the kernels. + for (auto &kernel : kernels.getArray()) { + if (!kernel.isMap()) + continue; + auto &kernelMap = kernel.getMap(); + auto &name = kernelMap[".name"]; + if (!name.isString()) + continue; + NamedAttrList attrList; + // Convert the kernel properties. + for (auto kv : kernelMap) { + if (!kv.first.isString()) + continue; + StringRef key = kv.first.getString(); + key.consume_front("."); + key.consume_back("."); + if (key == "name") + continue; + if (Attribute attr = convertNode(builder, kv.second)) + attrList.append(key, attr); + } + if (!attrList.empty()) + kernelMD[name.getString()] = builder.getDictionaryAttr(attrList); + } + return kernelMD; +} + +gpu::KernelTableAttr +mlir::ROCDL::getAMDHSAKernelsMetadata(Operation *gpuModule, + ArrayRef elfData) { + auto module = cast(gpuModule); + Builder builder(module.getContext()); + NamedAttrList moduleAttrs; + llvm::StringMap mdMap = getELFMetadata(builder, elfData); + for (auto funcOp : module.getBody()->getOps()) { + if (!funcOp->getDiscardableAttr("rocdl.kernel")) + continue; + moduleAttrs.append( + funcOp.getName(), + gpu::KernelAttr::get(funcOp, mdMap.lookup(funcOp.getName()))); + } + return gpu::KernelTableAttr::get( + moduleAttrs.getDictionary(module.getContext())); +} diff --git a/mlir/lib/Target/SPIRV/Target.cpp b/mlir/lib/Target/SPIRV/Target.cpp index d48548bf9709c..e75966a20db45 100644 --- a/mlir/lib/Target/SPIRV/Target.cpp +++ b/mlir/lib/Target/SPIRV/Target.cpp @@ -98,5 +98,5 @@ SPIRVTargetAttrImpl::createObject(Attribute attribute, Operation *module, return builder.getAttr( attribute, format, builder.getStringAttr(StringRef(object.data(), object.size())), - objectProps); + objectProps, nullptr); } diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index ba7897f4e80cb..692ef2a5e3bef 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -441,3 +441,9 @@ gpu.module @module_with_two_target [#nvvm.target, #rocdl.target gpu.module @module_with_offload_handler <#gpu.select_object<0>> [#nvvm.target] { } + + +gpu.binary @binary [#gpu.object<#rocdl.target, kernels = #gpu.kernel_table<{ + kernel0 = #gpu.kernel<{sym_name = "kernel0"}, metadata = {sgpr_count = 255}>, + kernel1 = #gpu.kernel<{sym_name = "kernel1"}> + }> , bin = "BLOB">] diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp index 33291bc4bcaed..3d5c84efb6f4f 100644 --- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp +++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp @@ -158,3 +158,48 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToBinary)) { ASSERT_FALSE(object->empty()); } } + +// Test ROCDL metadata. +TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) { + if (!hasROCMTools()) + GTEST_SKIP() << "ROCm installation not found, skipping test."; + + MLIRContext context(registry); + + OwningOpRef module = + parseSourceString(moduleStr, &context); + ASSERT_TRUE(!!module); + + // Create a ROCDL target. + ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context); + + // Serialize the module. + auto serializer = dyn_cast(target); + ASSERT_TRUE(!!serializer); + gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Binary); + for (auto gpuModule : (*module).getBody()->getOps()) { + std::optional> object = + serializer.serializeToObject(gpuModule, options); + // Check that the serializer was successful. + ASSERT_TRUE(object != std::nullopt); + ASSERT_FALSE(object->empty()); + if (!object) + continue; + // Get the metadata. + gpu::KernelTableAttr metadata = + ROCDL::getAMDHSAKernelsMetadata(gpuModule, *object); + ASSERT_TRUE(metadata != nullptr); + // There should be only a single kernel. + ASSERT_TRUE(metadata.size() == 1); + // Test the `ROCDLObjectMDAttr` iterators. + for (auto [name, kernel] : metadata) { + ASSERT_TRUE(name.getValue() == "rocdl_kernel"); + // Check that the ELF metadata is present. + ASSERT_TRUE(kernel.getMetadata() != nullptr); + // Verify that `sgpr_count` is present and it is an integer attribute. + ASSERT_TRUE(kernel.getMDAttr("sgpr_count") != nullptr); + // Verify that `vgpr_count` is present and it is an integer attribute. + ASSERT_TRUE(kernel.getMDAttr("vgpr_count") != nullptr); + } + } +} From e3e2b9bbd8b5d3fcbf5dfe51168d05aed7f3efc2 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Wed, 12 Jun 2024 21:11:57 +0000 Subject: [PATCH 02/15] fix python bindings --- mlir/include/mlir-c/Dialect/GPU.h | 6 ++++++ mlir/lib/Bindings/Python/DialectGPU.cpp | 21 ++++++++++++++++----- mlir/lib/CAPI/Dialect/GPU.cpp | 12 ++++++++++++ 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir-c/Dialect/GPU.h b/mlir/include/mlir-c/Dialect/GPU.h index 0c2a603ed9b89..d6d8eee639de9 100644 --- a/mlir/include/mlir-c/Dialect/GPU.h +++ b/mlir/include/mlir-c/Dialect/GPU.h @@ -53,6 +53,12 @@ mlirGPUObjectAttrHasProperties(MlirAttribute mlirObjectAttr); MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetProperties(MlirAttribute mlirObjectAttr); +MLIR_CAPI_EXPORTED bool +mlirGPUObjectAttrHasKernels(MlirAttribute mlirObjectAttr); + +MLIR_CAPI_EXPORTED MlirAttribute +mlirGPUObjectAttrGetKernels(MlirAttribute mlirObjectAttr); + #ifdef __cplusplus } #endif diff --git a/mlir/lib/Bindings/Python/DialectGPU.cpp b/mlir/lib/Bindings/Python/DialectGPU.cpp index a9e339b50dabc..d4df5d5b58130 100644 --- a/mlir/lib/Bindings/Python/DialectGPU.cpp +++ b/mlir/lib/Bindings/Python/DialectGPU.cpp @@ -48,17 +48,21 @@ PYBIND11_MODULE(_mlirDialectsGPU, m) { .def_classmethod( "get", [](py::object cls, MlirAttribute target, uint32_t format, - py::bytes object, std::optional mlirObjectProps) { + py::bytes object, std::optional mlirObjectProps, + std::optional mlirKernelsAttr) { py::buffer_info info(py::buffer(object).request()); MlirStringRef objectStrRef = mlirStringRefCreate(static_cast(info.ptr), info.size); return cls(mlirGPUObjectAttrGet( mlirAttributeGetContext(target), target, format, objectStrRef, mlirObjectProps.has_value() ? *mlirObjectProps + : MlirAttribute{nullptr}, + mlirKernelsAttr.has_value() ? *mlirKernelsAttr : MlirAttribute{nullptr})); }, "cls"_a, "target"_a, "format"_a, "object"_a, - "properties"_a = py::none(), "Gets a gpu.object from parameters.") + "properties"_a = py::none(), "kernels"_a = py::none(), + "Gets a gpu.object from parameters.") .def_property_readonly( "target", [](MlirAttribute self) { return mlirGPUObjectAttrGetTarget(self); }) @@ -71,9 +75,16 @@ PYBIND11_MODULE(_mlirDialectsGPU, m) { MlirStringRef stringRef = mlirGPUObjectAttrGetObject(self); return py::bytes(stringRef.data, stringRef.length); }) - .def_property_readonly("properties", [](MlirAttribute self) { - if (mlirGPUObjectAttrHasProperties(self)) - return py::cast(mlirGPUObjectAttrGetProperties(self)); + .def_property_readonly("properties", + [](MlirAttribute self) { + if (mlirGPUObjectAttrHasProperties(self)) + return py::cast( + mlirGPUObjectAttrGetProperties(self)); + return py::none().cast(); + }) + .def_property_readonly("kernels", [](MlirAttribute self) { + if (mlirGPUObjectAttrHasKernels(self)) + return py::cast(mlirGPUObjectAttrGetKernels(self)); return py::none().cast(); }); } diff --git a/mlir/lib/CAPI/Dialect/GPU.cpp b/mlir/lib/CAPI/Dialect/GPU.cpp index ffe60658cb2ce..61b3c23f21bed 100644 --- a/mlir/lib/CAPI/Dialect/GPU.cpp +++ b/mlir/lib/CAPI/Dialect/GPU.cpp @@ -82,3 +82,15 @@ MlirAttribute mlirGPUObjectAttrGetProperties(MlirAttribute mlirObjectAttr) { llvm::cast(unwrap(mlirObjectAttr)); return wrap(objectAttr.getProperties()); } + +bool mlirGPUObjectAttrHasKernels(MlirAttribute mlirObjectAttr) { + gpu::ObjectAttr objectAttr = + llvm::cast(unwrap(mlirObjectAttr)); + return objectAttr.getKernels() != nullptr; +} + +MlirAttribute mlirGPUObjectAttrGetKernels(MlirAttribute mlirObjectAttr) { + gpu::ObjectAttr objectAttr = + llvm::cast(unwrap(mlirObjectAttr)); + return wrap(objectAttr.getKernels()); +} From f105f205778678645bb82f1c595ae2a453b81c33 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Thu, 13 Jun 2024 15:03:06 +0000 Subject: [PATCH 03/15] addressed reviewer comments --- .../mlir/Dialect/GPU/IR/CompilationAttrs.td | 96 ++++++++++++------- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 54 ++++++++--- mlir/test/Dialect/GPU/ops.mlir | 6 +- .../Target/LLVM/SerializeROCDLTarget.cpp | 4 +- 4 files changed, 108 insertions(+), 52 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index f4037b55c85b4..0088b729ab8a7 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -22,72 +22,83 @@ include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { let description = [{ - GPU attribute for storing metadata related to a compiled kernel. It - contains the attribute dictionary of the LLVM function used to generate the - kernel, as well as an optional dictionary for additional metadata, like - occupancy information. + GPU attribute for storing metadata related to a compiled kernel. The + attribute contains the name and function type of the kernel. + + The attribute also contains optional parameters for storing the arguments + attributes as well as a dictionary for additional metadata, like occupancy + information or other function attributes. + + Note: The `arg_attrs` parameter is expected to follow all the constraints + imposed by the `mlir::FunctionOpInterface` interface. Examples: ```mlir - #gpu.kernel<{sym_name = "test_fusion__part_0", ...}, - metadata = {reg_count = 255, ...}> + #gpu.kernel<@kernel1, (i32) -> (), arg_attrs = [...], metadata = {reg_count = 255, ...}> + #gpu.kernel<@kernel2, (i32, f64) -> ()> ``` }]; let parameters = (ins - "DictionaryAttr":$func_attrs, + "StringAttr":$name, + "Type":$function_type, + OptionalParameter<"ArrayAttr", "arguments attributes">:$arg_attrs, OptionalParameter<"DictionaryAttr", "metadata dictionary">:$metadata ); let assemblyFormat = [{ - `<` $func_attrs (`,` `metadata` `=` $metadata^ )? `>` + `<` $name `,` $function_type (`,` struct($arg_attrs, $metadata)^)? `>` }]; let builders = [ - AttrBuilderWithInferredContext<(ins "DictionaryAttr":$funcAttrs, - CArg<"DictionaryAttr", - "nullptr">:$metadata), [{ - assert(funcAttrs && "invalid function attributes dictionary"); - return $_get(funcAttrs.getContext(), funcAttrs, metadata); + AttrBuilderWithInferredContext<(ins "StringAttr":$name, + "Type":$functionType, + CArg<"ArrayAttr", "nullptr">:$argAttrs, + CArg<"DictionaryAttr", + "nullptr">:$metadata), [{ + assert(name && "invalid name"); + return $_get(name.getContext(), name, functionType, argAttrs, metadata); }]>, - AttrBuilderWithInferredContext<(ins "Operation*":$kernel, + AttrBuilderWithInferredContext<(ins "FunctionOpInterface":$kernel, CArg<"DictionaryAttr", "nullptr">:$metadata)> ]; + let genVerifyDecl = 1; let extraClassDeclaration = [{ - /// Returns the function attribute corresponding to key or nullptr if missing. + /// Returns the metadata attribute corresponding to `key` or `nullptr` + /// if missing. Attribute getAttr(StringRef key) const { - return getFuncAttrs().get(key); + auto attrs = getMetadata(); + return attrs ? attrs.get(key) : nullptr; } template ConcreteAttr getAttr(StringRef key) const { return llvm::dyn_cast_or_null(getAttr(key)); } - Attribute getAttr(StringAttr key) const; + Attribute getAttr(StringAttr key) const { + auto attrs = getMetadata(); + return attrs ? attrs.get(key) : nullptr; + } template ConcreteAttr getAttr(StringAttr key) const { return llvm::dyn_cast_or_null(getAttr(key)); } - /// Returns the name of the kernel. - StringAttr getName() const { - return getAttr("sym_name"); + /// Returns the attribute dictionary at position `index`. + DictionaryAttr getArgAttrDict(unsigned index) { + auto argArray = getArgAttrs(); + return argArray ? llvm::cast(argArray[index]) : nullptr; } - /// Returns the metadta attribute corresponding to key or nullptr if missing. - Attribute getMDAttr(StringRef key) const { - if (DictionaryAttr attrs = getMetadata()) - return attrs.get(key); - return nullptr; + /// Return the specified attribute, if present, for the argument at 'index', + /// null otherwise. + Attribute getArgAttr(unsigned index, StringAttr name) { + auto argDict = getArgAttrDict(index); + return argDict ? argDict.get(name) : nullptr; } - template - ConcreteAttr getMDAttr(StringRef key) const { - return llvm::dyn_cast_or_null(getMDAttr(key)); - } - Attribute getMDAttr(StringAttr key) const; - template - ConcreteAttr getMDAttr(StringAttr key) const { - return llvm::dyn_cast_or_null(getMDAttr(key)); + Attribute getArgAttr(unsigned index, StringRef name) { + auto argDict = getArgAttrDict(index); + return argDict ? argDict.get(name) : nullptr; } - /// Helper function for appending metadata to a kernel attribute. + /// Returns a new KernelAttr that contains `attrs` in the metadata dictionary. KernelAttr appendMetadata(ArrayRef attrs) const; }]; } @@ -143,6 +154,14 @@ def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { size_t size() const { return getKernelTable().size(); } + + /// Returns the kernel with name `key` or `nullptr` if not present. + KernelAttr lookup(StringRef key) const { + return getKernelTable().getAs(key); + } + KernelAttr lookup(StringAttr key) const { + return getKernelTable().getAs(key); + } }]; } @@ -166,8 +185,9 @@ def GPU_CompilationTargetEnum : GPU_I32Enum< def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { let description = [{ A GPU object attribute glues together a GPU target, the object kind, a - binary string with the object, and the object properties, encapsulating how - the object was generated and its properties with the object itself. + binary string with the object, the object properties, and kernel metadata, + encapsulating how the object was generated and its properties with the + object itself. There are four object formats: 1. `Offload`: represents generic objects not described by the other three @@ -185,6 +205,10 @@ def GPU_ObjectAttr : GPU_Attr<"Object", "object"> { Object properties are specified through the `properties` dictionary attribute and can be used to define additional information. + + Kernel metadata is specified through the `kernels` parameter, and can be + used to specify additional information on a kernel by kernel basis. + The target attribute must implement or promise the `TargetAttrInterface` interface. diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index de69465c2ec6f..cf415b284e623 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2182,17 +2182,45 @@ LogicalResult gpu::DynamicSharedMemoryOp::verify() { // GPU KernelAttr //===----------------------------------------------------------------------===// -KernelAttr KernelAttr::get(Operation *kernelOp, DictionaryAttr metadata) { - assert(kernelOp && "invalid kernel"); - return get(kernelOp->getAttrDictionary(), metadata); +KernelAttr KernelAttr::get(FunctionOpInterface kernel, + DictionaryAttr metadata) { + assert(kernel && "invalid kernel"); + return get(kernel.getNameAttr(), kernel.getFunctionType(), + kernel.getAllArgAttrs(), metadata); +} + +KernelAttr KernelAttr::getChecked(function_ref emitError, + FunctionOpInterface kernel, + DictionaryAttr metadata) { + assert(kernel && "invalid kernel"); + return getChecked(emitError, kernel.getNameAttr(), kernel.getFunctionType(), + kernel.getAllArgAttrs(), metadata); } KernelAttr KernelAttr::appendMetadata(ArrayRef attrs) const { if (attrs.empty()) return *this; - NamedAttrList attrList(attrs); - attrList.append(getMetadata()); - return KernelAttr::get(getFuncAttrs(), attrList.getDictionary(getContext())); + NamedAttrList attrList; + if (auto dict = getMetadata()) + attrList.append(dict); + attrList.append(attrs); + return KernelAttr::get(getName(), getFunctionType(), getArgAttrs(), + attrList.getDictionary(getContext())); +} + +LogicalResult KernelAttr::verify(function_ref emitError, + StringAttr name, Type functionType, + ArrayAttr argAttrs, DictionaryAttr metadata) { + if (name.empty()) + return emitError() << "the kernel name can't be empty"; + if (argAttrs) { + if (llvm::any_of(argAttrs, [](Attribute attr) { + return !llvm::isa(attr); + })) + return emitError() + << "all attributes in the array must be a dictionary attribute"; + } + return success(); } //===----------------------------------------------------------------------===// @@ -2204,11 +2232,15 @@ KernelTableAttr::verify(function_ref emitError, DictionaryAttr dict) { if (!dict) return emitError() << "table cannot be null"; - if (llvm::any_of(dict, [](NamedAttribute attr) { - return !llvm::isa(attr.getValue()); - })) - return emitError() - << "all the dictionary values must be `#gpu.kernel` attributes"; + for (NamedAttribute attr : dict) { + auto kernel = llvm::dyn_cast(attr.getValue()); + if (!kernel) + return emitError() + << "all the dictionary values must be `#gpu.kernel` attributes"; + if (kernel.getName() != attr.getName()) + return emitError() << "expected kernel to be named `" << attr.getName() + << "` but got `" << kernel.getName() << "`"; + } return success(); } diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index 692ef2a5e3bef..d902cf3f41e08 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -444,6 +444,6 @@ gpu.module @module_with_offload_handler <#gpu.select_object<0>> [#nvvm.target] { gpu.binary @binary [#gpu.object<#rocdl.target, kernels = #gpu.kernel_table<{ - kernel0 = #gpu.kernel<{sym_name = "kernel0"}, metadata = {sgpr_count = 255}>, - kernel1 = #gpu.kernel<{sym_name = "kernel1"}> - }> , bin = "BLOB">] + kernel0 = #gpu.kernel<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, + kernel1 = #gpu.kernel<"kernel1", (i32) -> (), arg_attrs = [{llvm.read_only}]> + }>, bin = "BLOB">] diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp index 3d5c84efb6f4f..a75cb05468250 100644 --- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp +++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp @@ -197,9 +197,9 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) { // Check that the ELF metadata is present. ASSERT_TRUE(kernel.getMetadata() != nullptr); // Verify that `sgpr_count` is present and it is an integer attribute. - ASSERT_TRUE(kernel.getMDAttr("sgpr_count") != nullptr); + ASSERT_TRUE(kernel.getAttr("sgpr_count") != nullptr); // Verify that `vgpr_count` is present and it is an integer attribute. - ASSERT_TRUE(kernel.getMDAttr("vgpr_count") != nullptr); + ASSERT_TRUE(kernel.getAttr("vgpr_count") != nullptr); } } } From 1b51f5502a8d71cc441ebf5333c73286eb93d178 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Mon, 17 Jun 2024 16:41:29 +0000 Subject: [PATCH 04/15] expose function retrieving the ELF metadata dict --- mlir/include/mlir/Target/LLVM/ROCDL/Utils.h | 12 ++++++-- mlir/lib/Target/LLVM/ROCDL/Utils.cpp | 28 +++++++++++-------- .../Target/LLVM/SerializeROCDLTarget.cpp | 2 +- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h index 904d60a2a75db..3d2174c144815 100644 --- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h +++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h @@ -109,11 +109,19 @@ class SerializeGPUModuleBase : public LLVM::ModuleToObject { AMDGCNLibraries deviceLibs = AMDGCNLibraries::None; }; +/// Returns a map containing the `amdhsa.kernels` ELF metadata for each of the +/// kernels in the binary, or `std::nullopt` if the metadata couldn't be +/// retrieved. The map associates the name of the kernel with the list of named +/// attributes found in `amdhsa.kernels`. For more information on the ELF +/// metadata see: https://llvm.org/docs/AMDGPUUsage.html#amdhsa +std::optional> +getAMDHSAKernelsELFMetadata(Builder &builder, ArrayRef elfData); + /// Returns a `#gpu.kernel_table` containing kernel metadata for each of the /// kernels in `gpuModule`. If `elfData` is valid, then the `amdhsa.kernels` ELF /// metadata will be added to the `#gpu.kernel_table`. -gpu::KernelTableAttr getAMDHSAKernelsMetadata(Operation *gpuModule, - ArrayRef elfData = {}); +gpu::KernelTableAttr getKernelMetadata(Operation *gpuModule, + ArrayRef elfData = {}); } // namespace ROCDL } // namespace mlir diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp index 5029293bae2a6..c07ea8eebaf2e 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp @@ -166,20 +166,21 @@ static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node) { } /// The following function should succeed for Code object V3 and above. -static llvm::StringMap getELFMetadata(Builder &builder, - ArrayRef elfData) { +std::optional> +mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder, + ArrayRef elfData) { std::optional metadata = getAMDHSANote(elfData); if (!metadata) - return {}; - llvm::StringMap kernelMD; + return std::nullopt; + DenseMap kernelMD; llvm::msgpack::DocNode &root = (metadata)->getRoot(); // Fail if `root` is not a map -it should be for AMD Obj Ver 3. if (!root.isMap()) - return kernelMD; + return std::nullopt; auto &kernels = root.getMap()["amdhsa.kernels"]; // Fail if `amdhsa.kernels` is not an array. if (!kernels.isArray()) - return kernelMD; + return std::nullopt; // Convert each of the kernels. for (auto &kernel : kernels.getArray()) { if (!kernel.isMap()) @@ -202,24 +203,27 @@ static llvm::StringMap getELFMetadata(Builder &builder, attrList.append(key, attr); } if (!attrList.empty()) - kernelMD[name.getString()] = builder.getDictionaryAttr(attrList); + kernelMD[builder.getStringAttr(name.getString())] = std::move(attrList); } return kernelMD; } -gpu::KernelTableAttr -mlir::ROCDL::getAMDHSAKernelsMetadata(Operation *gpuModule, - ArrayRef elfData) { +gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule, + ArrayRef elfData) { auto module = cast(gpuModule); Builder builder(module.getContext()); NamedAttrList moduleAttrs; - llvm::StringMap mdMap = getELFMetadata(builder, elfData); + std::optional> mdMapOrNull = + getAMDHSAKernelsELFMetadata(builder, elfData); for (auto funcOp : module.getBody()->getOps()) { if (!funcOp->getDiscardableAttr("rocdl.kernel")) continue; moduleAttrs.append( funcOp.getName(), - gpu::KernelAttr::get(funcOp, mdMap.lookup(funcOp.getName()))); + gpu::KernelAttr::get( + funcOp, mdMapOrNull ? builder.getDictionaryAttr( + mdMapOrNull->lookup(funcOp.getNameAttr())) + : nullptr)); } return gpu::KernelTableAttr::get( moduleAttrs.getDictionary(module.getContext())); diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp index a75cb05468250..c5ad5a045d2f7 100644 --- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp +++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp @@ -187,7 +187,7 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) { continue; // Get the metadata. gpu::KernelTableAttr metadata = - ROCDL::getAMDHSAKernelsMetadata(gpuModule, *object); + ROCDL::getKernelMetadata(gpuModule, *object); ASSERT_TRUE(metadata != nullptr); // There should be only a single kernel. ASSERT_TRUE(metadata.size() == 1); From 36c2681e7282db11189e52a3eae1ce8b43f0a9ab Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 30 Jul 2024 15:30:31 +0000 Subject: [PATCH 05/15] address reviewer comments --- mlir/include/mlir-c/Dialect/GPU.h | 8 +- .../mlir/Dialect/GPU/IR/CompilationAttrs.td | 59 +++++----- mlir/lib/CAPI/Dialect/GPU.cpp | 19 +++- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 63 ++++++++--- mlir/lib/Target/LLVM/ROCDL/Target.cpp | 11 +- mlir/lib/Target/LLVM/ROCDL/Utils.cpp | 101 +++++++++--------- mlir/test/Dialect/GPU/invalid.mlir | 12 +++ mlir/test/Dialect/GPU/ops.mlir | 27 ++++- .../Target/LLVM/SerializeROCDLTarget.cpp | 29 ++++- 9 files changed, 212 insertions(+), 117 deletions(-) diff --git a/mlir/include/mlir-c/Dialect/GPU.h b/mlir/include/mlir-c/Dialect/GPU.h index d6d8eee639de9..321c1122c3370 100644 --- a/mlir/include/mlir-c/Dialect/GPU.h +++ b/mlir/include/mlir-c/Dialect/GPU.h @@ -35,8 +35,12 @@ MLIR_CAPI_EXPORTED bool mlirAttributeIsAGPUObjectAttr(MlirAttribute attr); MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target, uint32_t format, - MlirStringRef objectStrRef, MlirAttribute mlirObjectProps, - MlirAttribute mlirKernelsAttr); + MlirStringRef objectStrRef, MlirAttribute mlirObjectProps); + +MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetWithKernels( + MlirContext mlirCtx, MlirAttribute target, uint32_t format, + MlirStringRef objectStrRef, MlirAttribute mlirObjectProps, + MlirAttribute mlirKernelsAttr); MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr); diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index 0088b729ab8a7..78740a1cb5e1d 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -62,10 +62,15 @@ def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { ]; let genVerifyDecl = 1; let extraClassDeclaration = [{ + /// Compare two kernels based on the name. + bool operator<(const KernelAttr& other) const { + return getName().getValue() < other.getName().getValue(); + } + /// Returns the metadata attribute corresponding to `key` or `nullptr` /// if missing. Attribute getAttr(StringRef key) const { - auto attrs = getMetadata(); + DictionaryAttr attrs = getMetadata(); return attrs ? attrs.get(key) : nullptr; } template @@ -73,7 +78,7 @@ def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { return llvm::dyn_cast_or_null(getAttr(key)); } Attribute getAttr(StringAttr key) const { - auto attrs = getMetadata(); + DictionaryAttr attrs = getMetadata(); return attrs ? attrs.get(key) : nullptr; } template @@ -83,18 +88,18 @@ def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { /// Returns the attribute dictionary at position `index`. DictionaryAttr getArgAttrDict(unsigned index) { - auto argArray = getArgAttrs(); + ArrayAttr argArray = getArgAttrs(); return argArray ? llvm::cast(argArray[index]) : nullptr; } /// Return the specified attribute, if present, for the argument at 'index', /// null otherwise. Attribute getArgAttr(unsigned index, StringAttr name) { - auto argDict = getArgAttrDict(index); + DictionaryAttr argDict = getArgAttrDict(index); return argDict ? argDict.get(name) : nullptr; } Attribute getArgAttr(unsigned index, StringRef name) { - auto argDict = getArgAttrDict(index); + DictionaryAttr argDict = getArgAttrDict(index); return argDict ? argDict.get(name) : nullptr; } @@ -114,54 +119,38 @@ def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { Examples: ```mlir - #gpu.kernel_table<{kernel0 = #gpu.kernel<...>}> + #gpu.kernel_table<[#gpu.kernel]> ``` }]; let parameters = (ins - "DictionaryAttr":$kernel_table + OptionalArrayRefParameter<"KernelAttr", "array of kernels">:$kernel_table ); let assemblyFormat = [{ - `<` $kernel_table `>` + `<` (`[` qualified($kernel_table)^ `]`)? `>` }]; let builders = [ - AttrBuilderWithInferredContext<(ins "DictionaryAttr":$kernel_table), [{ - assert(kernel_table && "invalid kernel table"); - return $_get(kernel_table.getContext(), kernel_table); - }]> + AttrBuilder<(ins "ArrayRef":$kernels, + CArg<"bool", "false">:$isSorted)> ]; let skipDefaultBuilders = 1; let genVerifyDecl = 1; let extraClassDeclaration = [{ - /// Helper iterator class for traversing the kernel table. - struct KernelIterator - : llvm::mapped_iterator_base::iterator, - std::pair> { - using llvm::mapped_iterator_base< - KernelIterator, llvm::ArrayRef::iterator, - std::pair>::mapped_iterator_base; - /// Map the iterator to the kernel name and a KernelAttribute. - std::pair mapElement(NamedAttribute attr) const { - return {attr.getName(), llvm::cast(attr.getValue())}; - } - }; - auto begin() const { - return KernelIterator(getKernelTable().begin()); + llvm::ArrayRef::iterator begin() const { + return getKernelTable().begin(); } - auto end() const { - return KernelIterator(getKernelTable().end()); + llvm::ArrayRef::iterator end() const { + return getKernelTable().end(); } size_t size() const { return getKernelTable().size(); } + bool empty() const { + return getKernelTable().empty(); + } /// Returns the kernel with name `key` or `nullptr` if not present. - KernelAttr lookup(StringRef key) const { - return getKernelTable().getAs(key); - } - KernelAttr lookup(StringAttr key) const { - return getKernelTable().getAs(key); - } + KernelAttr lookup(StringRef key) const; + KernelAttr lookup(StringAttr key) const; }]; } diff --git a/mlir/lib/CAPI/Dialect/GPU.cpp b/mlir/lib/CAPI/Dialect/GPU.cpp index 61b3c23f21bed..e4796ed1499ea 100644 --- a/mlir/lib/CAPI/Dialect/GPU.cpp +++ b/mlir/lib/CAPI/Dialect/GPU.cpp @@ -37,8 +37,23 @@ bool mlirAttributeIsAGPUObjectAttr(MlirAttribute attr) { MlirAttribute mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target, uint32_t format, MlirStringRef objectStrRef, - MlirAttribute mlirObjectProps, - MlirAttribute mlirKernelsAttr) { + MlirAttribute mlirObjectProps) { + MLIRContext *ctx = unwrap(mlirCtx); + llvm::StringRef object = unwrap(objectStrRef); + DictionaryAttr objectProps; + if (mlirObjectProps.ptr != nullptr) + objectProps = llvm::cast(unwrap(mlirObjectProps)); + return wrap(gpu::ObjectAttr::get( + ctx, unwrap(target), static_cast(format), + StringAttr::get(ctx, object), objectProps, nullptr)); +} + +MlirAttribute mlirGPUObjectAttrGetWithKernels(MlirContext mlirCtx, + MlirAttribute target, + uint32_t format, + MlirStringRef objectStrRef, + MlirAttribute mlirObjectProps, + MlirAttribute mlirKernelsAttr) { MLIRContext *ctx = unwrap(mlirCtx); llvm::StringRef object = unwrap(objectStrRef); DictionaryAttr objectProps; diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index cf415b284e623..78d95af1e3253 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2201,7 +2201,7 @@ KernelAttr KernelAttr::appendMetadata(ArrayRef attrs) const { if (attrs.empty()) return *this; NamedAttrList attrList; - if (auto dict = getMetadata()) + if (DictionaryAttr dict = getMetadata()) attrList.append(dict); attrList.append(attrs); return KernelAttr::get(getName(), getFunctionType(), getArgAttrs(), @@ -2227,23 +2227,62 @@ LogicalResult KernelAttr::verify(function_ref emitError, // GPU KernelTableAttr //===----------------------------------------------------------------------===// +KernelTableAttr KernelTableAttr::get(MLIRContext *context, + ArrayRef kernels, + bool isSorted) { + // Note that `is_sorted` is always only invoked once even with assertions ON. + assert((!isSorted || llvm::is_sorted(kernels)) && + "expected a sorted kernel array"); + // Immediately return the attribute if the array is sorted. + if (isSorted || llvm::is_sorted(kernels)) + return Base::get(context, kernels); + // Sort the array. + SmallVector kernelsTmp(kernels); + llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end()); + return Base::get(context, kernelsTmp); +} + +KernelTableAttr +KernelTableAttr::getChecked(function_ref emitError, + MLIRContext *context, ArrayRef kernels, + bool isSorted) { + // Note that `is_sorted` is always only invoked once even with assertions ON. + assert((!isSorted || llvm::is_sorted(kernels)) && + "expected a sorted kernel array"); + // Immediately return the attribute if the array is sorted. + if (isSorted || llvm::is_sorted(kernels)) + return Base::getChecked(emitError, context, kernels); + // Sort the array. + SmallVector kernelsTmp(kernels); + llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end()); + return Base::getChecked(emitError, context, kernelsTmp); +} + LogicalResult KernelTableAttr::verify(function_ref emitError, - DictionaryAttr dict) { - if (!dict) - return emitError() << "table cannot be null"; - for (NamedAttribute attr : dict) { - auto kernel = llvm::dyn_cast(attr.getValue()); - if (!kernel) - return emitError() - << "all the dictionary values must be `#gpu.kernel` attributes"; - if (kernel.getName() != attr.getName()) - return emitError() << "expected kernel to be named `" << attr.getName() - << "` but got `" << kernel.getName() << "`"; + ArrayRef kernels) { + if (kernels.size() < 2) + return success(); + // Check that the kernels are uniquely named. + if (std::adjacent_find(kernels.begin(), kernels.end(), + [](KernelAttr l, KernelAttr r) { + return l.getName() == r.getName(); + }) != kernels.end()) { + return emitError() << "expected all kernels to be uniquely named"; } return success(); } +KernelAttr KernelTableAttr::lookup(StringRef key) const { + auto it = impl::findAttrSorted(begin(), end(), key); + return it.second ? *it.first : KernelAttr(); +} + +KernelAttr KernelTableAttr::lookup(StringAttr key) const { + auto it = impl::findAttrSorted(begin(), end(), key); + return it.second ? *it.first : KernelAttr(); +} + //===----------------------------------------------------------------------===// // GPU target options //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index 47dfb3d63bd4a..5b0e81d16a07b 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -508,13 +508,10 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, // supported. if (format > gpu::CompilationTarget::Binary) format = gpu::CompilationTarget::Binary; - DictionaryAttr properties{}; Builder builder(attribute.getContext()); - return builder.getAttr( - attribute, - format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary - : format, - builder.getStringAttr(StringRef(object.data(), object.size())), - properties, nullptr); + StringAttr objectStr = + builder.getStringAttr(StringRef(object.data(), object.size())); + return builder.getAttr(attribute, format, objectStr, + properties, nullptr); } diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp index c07ea8eebaf2e..4829950a1818c 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp @@ -29,17 +29,19 @@ using namespace mlir::ROCDL; /// `amdhsa.kernels`: /// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata template -static std::optional +static std::unique_ptr getAMDHSANote(llvm::object::ELFObjectFile &elfObj) { using namespace llvm; using namespace llvm::object; using namespace llvm::ELF; const ELFFile &elf = elfObj.getELFFile(); - auto secOrErr = elf.sections(); - if (!secOrErr) - return std::nullopt; + Expected secOrErr = elf.sections(); + if (!secOrErr) { + consumeError(secOrErr.takeError()); + return nullptr; + } ArrayRef sections = *secOrErr; - for (auto section : sections) { + for (const typename ELFT::Shdr §ion : sections) { if (section.sh_type != ELF::SHT_NOTE) continue; size_t align = std::max(static_cast(section.sh_addralign), 4u); @@ -54,45 +56,45 @@ getAMDHSANote(llvm::object::ELFObjectFile &elfObj) { ArrayRef desc = note.getDesc(align); StringRef msgPackString = StringRef(reinterpret_cast(desc.data()), desc.size()); - msgpack::Document msgPackDoc; - if (!msgPackDoc.readFromBlob(msgPackString, /*Multi=*/false)) - return std::nullopt; - if (msgPackDoc.getRoot().isScalar()) - return std::nullopt; - return std::optional(std::move(msgPackDoc)); + std::unique_ptr msgPackDoc( + new llvm::msgpack::Document()); + if (!msgPackDoc->readFromBlob(msgPackString, /*Multi=*/false)) + return nullptr; + if (msgPackDoc->getRoot().isScalar()) + return nullptr; + return msgPackDoc; } } - return std::nullopt; + return nullptr; } -/// Return the `amdhsa.kernels` metadata in the ELF object or std::nullopt on +/// Return the `amdhsa.kernels` metadata in the ELF object or nullptr on /// failure. This is a helper function that casts a generic `ObjectFile` to the /// appropiate `ELFObjectFile`. -static std::optional +static std::unique_ptr getAMDHSANote(ArrayRef elfData) { using namespace llvm; using namespace llvm::object; if (elfData.empty()) - return std::nullopt; + return nullptr; MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), "buffer"); Expected> objOrErr = ObjectFile::createELFObjectFile(buffer); if (!objOrErr || !objOrErr.get()) { // Drop the error. llvm::consumeError(objOrErr.takeError()); - return std::nullopt; + return nullptr; } ObjectFile &elf = *(objOrErr.get()); - std::optional metadata; if (auto *obj = dyn_cast(&elf)) - metadata = getAMDHSANote(*obj); + return getAMDHSANote(*obj); else if (auto *obj = dyn_cast(&elf)) - metadata = getAMDHSANote(*obj); + return getAMDHSANote(*obj); else if (auto *obj = dyn_cast(&elf)) - metadata = getAMDHSANote(*obj); + return getAMDHSANote(*obj); else if (auto *obj = dyn_cast(&elf)) - metadata = getAMDHSANote(*obj); - return metadata; + return getAMDHSANote(*obj); + return nullptr; } /// Utility functions for converting `llvm::msgpack::DocNode` nodes. @@ -100,11 +102,11 @@ static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node); static Attribute convertNode(Builder &builder, llvm::msgpack::MapDocNode &node) { NamedAttrList attrs; - for (auto kv : node) { - if (!kv.first.isString()) + for (auto &[keyNode, valueNode] : node) { + if (!keyNode.isString()) continue; - if (Attribute attr = convertNode(builder, kv.second)) { - auto key = kv.first.getString(); + StringRef key = keyNode.getString(); + if (Attribute attr = convertNode(builder, valueNode)) { key.consume_front("."); key.consume_back("."); attrs.append(key, attr); @@ -125,7 +127,7 @@ static Attribute convertNode(Builder &builder, })) { SmallVector values; for (llvm::msgpack::DocNode &n : node) { - auto kind = n.getKind(); + llvm::msgpack::Type kind = n.getKind(); if (kind == NodeKind::Int) values.push_back(n.getInt()); else if (kind == NodeKind::UInt) @@ -169,41 +171,43 @@ static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node) { std::optional> mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder, ArrayRef elfData) { - std::optional metadata = getAMDHSANote(elfData); + using namespace llvm::msgpack; + std::unique_ptr metadata = getAMDHSANote(elfData); if (!metadata) return std::nullopt; DenseMap kernelMD; - llvm::msgpack::DocNode &root = (metadata)->getRoot(); - // Fail if `root` is not a map -it should be for AMD Obj Ver 3. - if (!root.isMap()) + DocNode &rootNode = (metadata)->getRoot(); + // Fail if `rootNode` is not a map -it should be for AMD Obj Ver 3. + if (!rootNode.isMap()) return std::nullopt; - auto &kernels = root.getMap()["amdhsa.kernels"]; + DocNode &kernels = rootNode.getMap()["amdhsa.kernels"]; // Fail if `amdhsa.kernels` is not an array. if (!kernels.isArray()) return std::nullopt; // Convert each of the kernels. - for (auto &kernel : kernels.getArray()) { + for (DocNode &kernel : kernels.getArray()) { if (!kernel.isMap()) continue; - auto &kernelMap = kernel.getMap(); - auto &name = kernelMap[".name"]; - if (!name.isString()) + MapDocNode &kernelMap = kernel.getMap(); + DocNode &nameNode = kernelMap[".name"]; + if (!nameNode.isString()) continue; + StringRef name = nameNode.getString(); NamedAttrList attrList; // Convert the kernel properties. - for (auto kv : kernelMap) { - if (!kv.first.isString()) + for (auto &[keyNode, valueNode] : kernelMap) { + if (!keyNode.isString()) continue; - StringRef key = kv.first.getString(); + StringRef key = keyNode.getString(); key.consume_front("."); key.consume_back("."); if (key == "name") continue; - if (Attribute attr = convertNode(builder, kv.second)) + if (Attribute attr = convertNode(builder, valueNode)) attrList.append(key, attr); } if (!attrList.empty()) - kernelMD[builder.getStringAttr(name.getString())] = std::move(attrList); + kernelMD[builder.getStringAttr(name)] = std::move(attrList); } return kernelMD; } @@ -212,19 +216,16 @@ gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule, ArrayRef elfData) { auto module = cast(gpuModule); Builder builder(module.getContext()); - NamedAttrList moduleAttrs; + SmallVector kernels; std::optional> mdMapOrNull = getAMDHSAKernelsELFMetadata(builder, elfData); for (auto funcOp : module.getBody()->getOps()) { if (!funcOp->getDiscardableAttr("rocdl.kernel")) continue; - moduleAttrs.append( - funcOp.getName(), - gpu::KernelAttr::get( - funcOp, mdMapOrNull ? builder.getDictionaryAttr( - mdMapOrNull->lookup(funcOp.getNameAttr())) - : nullptr)); + kernels.push_back(gpu::KernelAttr::get( + funcOp, mdMapOrNull ? builder.getDictionaryAttr( + mdMapOrNull->lookup(funcOp.getNameAttr())) + : nullptr)); } - return gpu::KernelTableAttr::get( - moduleAttrs.getDictionary(module.getContext())); + return gpu::KernelTableAttr::get(gpuModule->getContext(), kernels); } diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 20c1c4cf8a2d0..8c0e0f1a87fd7 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -848,3 +848,15 @@ module attributes {gpu.container_module} { gpu.module @kernel <> { } } + +// ----- + +gpu.binary @binary [#gpu.object<#rocdl.target, + // expected-error@+1{{expected all kernels to be uniquely named}} + kernels = #gpu.kernel_table<[ + #gpu.kernel<"kernel", (i32) -> ()>, + #gpu.kernel<"kernel", (i32, f32) -> (), metadata = {sgpr_count = 255}> + // expected-error@below{{failed to parse GPU_ObjectAttr parameter 'kernels' which is to be a `KernelTableAttr`}} + ]>, + bin = "BLOB"> + ] diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index d902cf3f41e08..f80dca7552623 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -442,8 +442,25 @@ gpu.module @module_with_two_target [#nvvm.target, #rocdl.target gpu.module @module_with_offload_handler <#gpu.select_object<0>> [#nvvm.target] { } - -gpu.binary @binary [#gpu.object<#rocdl.target, kernels = #gpu.kernel_table<{ - kernel0 = #gpu.kernel<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, - kernel1 = #gpu.kernel<"kernel1", (i32) -> (), arg_attrs = [{llvm.read_only}]> - }>, bin = "BLOB">] +// Test kernel attributes +gpu.binary @kernel_attrs_1 [ + #gpu.object<#rocdl.target, + kernels = #gpu.kernel_table<[ + #gpu.kernel<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, + #gpu.kernel<"kernel1", (i32) -> (), arg_attrs = [{llvm.read_only}]> + ]>, + bin = "BLOB"> + ] + +// Verify the kernels are sorted +// CHECK-LABEL: gpu.binary @kernel_attrs_2 +gpu.binary @kernel_attrs_2 [ + // CHECK: [#gpu.kernel<"a_kernel", () -> ()>, #gpu.kernel<"m_kernel", () -> ()>, #gpu.kernel<"z_kernel", () -> ()>] + #gpu.object<#rocdl.target, + kernels = #gpu.kernel_table<[ + #gpu.kernel<"z_kernel", () -> ()>, + #gpu.kernel<"m_kernel", () -> ()>, + #gpu.kernel<"a_kernel", () -> ()> + ]>, + bin = "BLOB"> + ] diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp index c5ad5a045d2f7..100c483c515de 100644 --- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp +++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp @@ -166,6 +166,23 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) { MLIRContext context(registry); + // MLIR module used for the tests. + const std::string moduleStr = R"mlir( + gpu.module @rocdl_test { + llvm.func @rocdl_kernel_1(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + llvm.func @rocdl_kernel_0(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + llvm.func @rocdl_kernel_2(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + llvm.func @a_kernel(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} { + llvm.return + } + })mlir"; + OwningOpRef module = parseSourceString(moduleStr, &context); ASSERT_TRUE(!!module); @@ -189,11 +206,15 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) { gpu::KernelTableAttr metadata = ROCDL::getKernelMetadata(gpuModule, *object); ASSERT_TRUE(metadata != nullptr); - // There should be only a single kernel. - ASSERT_TRUE(metadata.size() == 1); + // There should be 4 kernels. + ASSERT_TRUE(metadata.size() == 4); + // Check that the lookup method returns finds the kernel. + ASSERT_TRUE(metadata.lookup("a_kernel") != nullptr); + ASSERT_TRUE(metadata.lookup("rocdl_kernel_0") != nullptr); + // Check that the kernel doesn't exist. + ASSERT_TRUE(metadata.lookup("not_existent_kernel") == nullptr); // Test the `ROCDLObjectMDAttr` iterators. - for (auto [name, kernel] : metadata) { - ASSERT_TRUE(name.getValue() == "rocdl_kernel"); + for (gpu::KernelAttr kernel : metadata) { // Check that the ELF metadata is present. ASSERT_TRUE(kernel.getMetadata() != nullptr); // Verify that `sgpr_count` is present and it is an integer attribute. From 246ba0f0612b78e8e56f1630deaf8845ed0b074e Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 30 Jul 2024 15:45:03 +0000 Subject: [PATCH 06/15] fix docs --- .../mlir/Dialect/GPU/IR/CompilationAttrs.td | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index 78740a1cb5e1d..0cc51620d0b6a 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -114,12 +114,23 @@ def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { let description = [{ - GPU attribute representing a table of kernels metadata. All the attributes - in the dictionary must be of type `#gpu.kernel`. + GPU attribute representing a list of `#gpu.kernel` attributes. This + attribute supports searching kernels by name. All kernels in the table must + have an unique name. Examples: ```mlir - #gpu.kernel_table<[#gpu.kernel]> + // Empty table + #gpu.kernel_table<> + + // Table with a single kernel + #gpu.kernel_table<[#gpu.kernel () >]> + + // Table with multiple kernels. + #gpu.kernel_table<[ + #gpu.kernel<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, + #gpu.kernel<"kernel1", (i32) -> ()> + ]> ``` }]; let parameters = (ins From 9814aec005986f43e44436ca5a50d8ea9f3edba1 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 30 Jul 2024 15:49:18 +0000 Subject: [PATCH 07/15] expand types --- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 6 ++++-- mlir/lib/Target/LLVM/ROCDL/Utils.cpp | 23 +++++++++++------------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 78d95af1e3253..a5c1ca8ae3c1f 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2274,12 +2274,14 @@ KernelTableAttr::verify(function_ref emitError, } KernelAttr KernelTableAttr::lookup(StringRef key) const { - auto it = impl::findAttrSorted(begin(), end(), key); + std::pair::iterator, bool> it = + impl::findAttrSorted(begin(), end(), key); return it.second ? *it.first : KernelAttr(); } KernelAttr KernelTableAttr::lookup(StringAttr key) const { - auto it = impl::findAttrSorted(begin(), end(), key); + std::pair::iterator, bool> it = + impl::findAttrSorted(begin(), end(), key); return it.second ? *it.first : KernelAttr(); } diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp index 4829950a1818c..7e703924db0ed 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp @@ -119,18 +119,18 @@ static Attribute convertNode(Builder &builder, static Attribute convertNode(Builder &builder, llvm::msgpack::ArrayDocNode &node) { - using NodeKind = llvm::msgpack::Type; // Use `DenseIntAttr` if we know all the attrs are ints. if (llvm::all_of(node, [](llvm::msgpack::DocNode &n) { - auto kind = n.getKind(); - return kind == NodeKind::Int || kind == NodeKind::UInt; + llvm::msgpack::Type kind = n.getKind(); + return kind == llvm::msgpack::Type::Int || + kind == llvm::msgpack::Type::UInt; })) { SmallVector values; for (llvm::msgpack::DocNode &n : node) { llvm::msgpack::Type kind = n.getKind(); - if (kind == NodeKind::Int) + if (kind == llvm::msgpack::Type::Int) values.push_back(n.getInt()); - else if (kind == NodeKind::UInt) + else if (kind == llvm::msgpack::Type::UInt) values.push_back(n.getUInt()); } return builder.getDenseI64ArrayAttr(values); @@ -148,19 +148,18 @@ static Attribute convertNode(Builder &builder, static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node) { using namespace llvm::msgpack; - using NodeKind = llvm::msgpack::Type; switch (node.getKind()) { - case NodeKind::Int: + case llvm::msgpack::Type::Int: return builder.getI64IntegerAttr(node.getInt()); - case NodeKind::UInt: + case llvm::msgpack::Type::UInt: return builder.getI64IntegerAttr(node.getUInt()); - case NodeKind::Boolean: + case llvm::msgpack::Type::Boolean: return builder.getI64IntegerAttr(node.getBool()); - case NodeKind::String: + case llvm::msgpack::Type::String: return builder.getStringAttr(node.getString()); - case NodeKind::Array: + case llvm::msgpack::Type::Array: return convertNode(builder, node.getArray()); - case NodeKind::Map: + case llvm::msgpack::Type::Map: return convertNode(builder, node.getMap()); default: return nullptr; From 837050318ec5f1238f650baa34aab2c8f22d273e Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 30 Jul 2024 17:18:10 +0000 Subject: [PATCH 08/15] fix python bindings --- mlir/lib/Bindings/Python/DialectGPU.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Bindings/Python/DialectGPU.cpp b/mlir/lib/Bindings/Python/DialectGPU.cpp index d4df5d5b58130..560a54bcd1591 100644 --- a/mlir/lib/Bindings/Python/DialectGPU.cpp +++ b/mlir/lib/Bindings/Python/DialectGPU.cpp @@ -53,7 +53,7 @@ PYBIND11_MODULE(_mlirDialectsGPU, m) { py::buffer_info info(py::buffer(object).request()); MlirStringRef objectStrRef = mlirStringRefCreate(static_cast(info.ptr), info.size); - return cls(mlirGPUObjectAttrGet( + return cls(mlirGPUObjectAttrGetWithKernels( mlirAttributeGetContext(target), target, format, objectStrRef, mlirObjectProps.has_value() ? *mlirObjectProps : MlirAttribute{nullptr}, From 1ba1fc73452912ca2ed9c74af093275e1348b953 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 20 Aug 2024 14:46:34 +0000 Subject: [PATCH 09/15] Rebase and use LLVM offload utilities to get AMD metadata --- mlir/lib/Target/LLVM/CMakeLists.txt | 2 +- mlir/lib/Target/LLVM/ROCDL/Utils.cpp | 225 +++++---------------------- 2 files changed, 42 insertions(+), 185 deletions(-) diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt index 4999ce00ba5bc..bc14c568e46be 100644 --- a/mlir/lib/Target/LLVM/CMakeLists.txt +++ b/mlir/lib/Target/LLVM/CMakeLists.txt @@ -115,8 +115,8 @@ add_mlir_dialect_library(MLIRROCDLTarget OBJECT LINK_COMPONENTS + FrontendOffloading MCParser - Object ${AMDGPU_LIBS} LINK_LIBS PUBLIC diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp index 7e703924db0ed..44b8100103e7b 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp @@ -15,200 +15,57 @@ #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" #include "llvm/ADT/StringMap.h" -#include "llvm/BinaryFormat/MsgPackDocument.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/AMDGPUMetadata.h" +#include "llvm/Frontend/Offloading/Utility.h" using namespace mlir; using namespace mlir::ROCDL; -/// Search the ELF object and return an object containing the `amdhsa.kernels` -/// metadata note. Function adapted from: -/// llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp Also see -/// `amdhsa.kernels`: -/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata -template -static std::unique_ptr -getAMDHSANote(llvm::object::ELFObjectFile &elfObj) { - using namespace llvm; - using namespace llvm::object; - using namespace llvm::ELF; - const ELFFile &elf = elfObj.getELFFile(); - Expected secOrErr = elf.sections(); - if (!secOrErr) { - consumeError(secOrErr.takeError()); - return nullptr; - } - ArrayRef sections = *secOrErr; - for (const typename ELFT::Shdr §ion : sections) { - if (section.sh_type != ELF::SHT_NOTE) - continue; - size_t align = std::max(static_cast(section.sh_addralign), 4u); - Error err = Error::success(); - for (const typename ELFT::Note note : elf.notes(section, err)) { - StringRef name = note.getName(); - if (name != "AMDGPU") - continue; - uint32_t type = note.getType(); - if (type != ELF::NT_AMDGPU_METADATA) - continue; - ArrayRef desc = note.getDesc(align); - StringRef msgPackString = - StringRef(reinterpret_cast(desc.data()), desc.size()); - std::unique_ptr msgPackDoc( - new llvm::msgpack::Document()); - if (!msgPackDoc->readFromBlob(msgPackString, /*Multi=*/false)) - return nullptr; - if (msgPackDoc->getRoot().isScalar()) - return nullptr; - return msgPackDoc; - } - } - return nullptr; -} - -/// Return the `amdhsa.kernels` metadata in the ELF object or nullptr on -/// failure. This is a helper function that casts a generic `ObjectFile` to the -/// appropiate `ELFObjectFile`. -static std::unique_ptr -getAMDHSANote(ArrayRef elfData) { - using namespace llvm; - using namespace llvm::object; - if (elfData.empty()) - return nullptr; - MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), "buffer"); - Expected> objOrErr = - ObjectFile::createELFObjectFile(buffer); - if (!objOrErr || !objOrErr.get()) { - // Drop the error. - llvm::consumeError(objOrErr.takeError()); - return nullptr; - } - ObjectFile &elf = *(objOrErr.get()); - if (auto *obj = dyn_cast(&elf)) - return getAMDHSANote(*obj); - else if (auto *obj = dyn_cast(&elf)) - return getAMDHSANote(*obj); - else if (auto *obj = dyn_cast(&elf)) - return getAMDHSANote(*obj); - else if (auto *obj = dyn_cast(&elf)) - return getAMDHSANote(*obj); - return nullptr; -} - -/// Utility functions for converting `llvm::msgpack::DocNode` nodes. -static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node); -static Attribute convertNode(Builder &builder, - llvm::msgpack::MapDocNode &node) { - NamedAttrList attrs; - for (auto &[keyNode, valueNode] : node) { - if (!keyNode.isString()) - continue; - StringRef key = keyNode.getString(); - if (Attribute attr = convertNode(builder, valueNode)) { - key.consume_front("."); - key.consume_back("."); - attrs.append(key, attr); - } - } - if (attrs.empty()) - return nullptr; - return builder.getDictionaryAttr(attrs); -} - -static Attribute convertNode(Builder &builder, - llvm::msgpack::ArrayDocNode &node) { - // Use `DenseIntAttr` if we know all the attrs are ints. - if (llvm::all_of(node, [](llvm::msgpack::DocNode &n) { - llvm::msgpack::Type kind = n.getKind(); - return kind == llvm::msgpack::Type::Int || - kind == llvm::msgpack::Type::UInt; - })) { - SmallVector values; - for (llvm::msgpack::DocNode &n : node) { - llvm::msgpack::Type kind = n.getKind(); - if (kind == llvm::msgpack::Type::Int) - values.push_back(n.getInt()); - else if (kind == llvm::msgpack::Type::UInt) - values.push_back(n.getUInt()); - } - return builder.getDenseI64ArrayAttr(values); - } - // Convert the array. - SmallVector attrs; - for (llvm::msgpack::DocNode &n : node) { - if (Attribute attr = convertNode(builder, n)) - attrs.push_back(attr); - } - if (attrs.empty()) - return nullptr; - return builder.getArrayAttr(attrs); -} - -static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node) { - using namespace llvm::msgpack; - switch (node.getKind()) { - case llvm::msgpack::Type::Int: - return builder.getI64IntegerAttr(node.getInt()); - case llvm::msgpack::Type::UInt: - return builder.getI64IntegerAttr(node.getUInt()); - case llvm::msgpack::Type::Boolean: - return builder.getI64IntegerAttr(node.getBool()); - case llvm::msgpack::Type::String: - return builder.getStringAttr(node.getString()); - case llvm::msgpack::Type::Array: - return convertNode(builder, node.getArray()); - case llvm::msgpack::Type::Map: - return convertNode(builder, node.getMap()); - default: - return nullptr; - } -} - -/// The following function should succeed for Code object V3 and above. std::optional> mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder, ArrayRef elfData) { - using namespace llvm::msgpack; - std::unique_ptr metadata = getAMDHSANote(elfData); - if (!metadata) + uint16_t elfABIVersion; + llvm::StringMap kernels; + llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), + "buffer"); + // Get the metadata. + llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( + buffer, kernels, elfABIVersion); + // Return `nullopt` if the metadata couldn't be retrieved. + if (!error) { + llvm::consumeError(std::move(error)); return std::nullopt; + } + // Helper lambda for converting values. + auto getI32Array = [&builder](const uint32_t *array) { + return builder.getDenseI32ArrayAttr({static_cast(array[0]), + static_cast(array[1]), + static_cast(array[2])}); + }; DenseMap kernelMD; - DocNode &rootNode = (metadata)->getRoot(); - // Fail if `rootNode` is not a map -it should be for AMD Obj Ver 3. - if (!rootNode.isMap()) - return std::nullopt; - DocNode &kernels = rootNode.getMap()["amdhsa.kernels"]; - // Fail if `amdhsa.kernels` is not an array. - if (!kernels.isArray()) - return std::nullopt; - // Convert each of the kernels. - for (DocNode &kernel : kernels.getArray()) { - if (!kernel.isMap()) - continue; - MapDocNode &kernelMap = kernel.getMap(); - DocNode &nameNode = kernelMap[".name"]; - if (!nameNode.isString()) - continue; - StringRef name = nameNode.getString(); - NamedAttrList attrList; - // Convert the kernel properties. - for (auto &[keyNode, valueNode] : kernelMap) { - if (!keyNode.isString()) - continue; - StringRef key = keyNode.getString(); - key.consume_front("."); - key.consume_back("."); - if (key == "name") - continue; - if (Attribute attr = convertNode(builder, valueNode)) - attrList.append(key, attr); - } - if (!attrList.empty()) - kernelMD[builder.getStringAttr(name)] = std::move(attrList); + for (const auto &[name, kernel] : kernels) { + NamedAttrList attrs; + // Add kernel metadata. + attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount)); + attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount)); + attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount)); + attrs.append("sgpr_spill_count", + builder.getI64IntegerAttr(kernel.SGPRSpillCount)); + attrs.append("vgpr_spill_count", + builder.getI64IntegerAttr(kernel.VGPRSpillCount)); + attrs.append("wavefront_size", + builder.getI64IntegerAttr(kernel.WavefrontSize)); + attrs.append("max_flat_workgroup_size", + builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize)); + attrs.append("group_segment_fixed_size", + builder.getI64IntegerAttr(kernel.GroupSegmentList)); + attrs.append("private_segment_fixed_size", + builder.getI64IntegerAttr(kernel.PrivateSegmentSize)); + attrs.append("reqd_workgroup_size", + getI32Array(kernel.RequestedWorkgroupSize)); + attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint)); + kernelMD[builder.getStringAttr(name)] = std::move(attrs); } - return kernelMD; + return std::move(kernelMD); } gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule, From a4ad4f18da2532b0032130bb88deb9f2b8659c71 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 27 Aug 2024 15:18:48 +0000 Subject: [PATCH 10/15] address reviewer comments --- mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td | 4 ++-- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 10 ++++------ mlir/lib/Target/LLVM/ROCDL/Target.cpp | 7 +++++-- mlir/lib/Target/LLVM/ROCDL/Utils.cpp | 2 +- mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp | 2 +- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index 0cc51620d0b6a..dba1186af397f 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -120,10 +120,10 @@ def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { Examples: ```mlir - // Empty table + // Empty table. #gpu.kernel_table<> - // Table with a single kernel + // Table with a single kernel. #gpu.kernel_table<[#gpu.kernel () >]> // Table with multiple kernels. diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index a5c1ca8ae3c1f..47f97c9635fb9 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2274,15 +2274,13 @@ KernelTableAttr::verify(function_ref emitError, } KernelAttr KernelTableAttr::lookup(StringRef key) const { - std::pair::iterator, bool> it = - impl::findAttrSorted(begin(), end(), key); - return it.second ? *it.first : KernelAttr(); + auto [iterator, found] = impl::findAttrSorted(begin(), end(), key); + return found ? *iterator : KernelAttr(); } KernelAttr KernelTableAttr::lookup(StringAttr key) const { - std::pair::iterator, bool> it = - impl::findAttrSorted(begin(), end(), key); - return it.second ? *it.first : KernelAttr(); + auto [iterator, found] = impl::findAttrSorted(begin(), end(), key); + return found ? *iterator : KernelAttr(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp index 5b0e81d16a07b..d8a79a7e80d64 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp @@ -506,12 +506,15 @@ ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, gpu::CompilationTarget format = options.getCompilationTarget(); // If format is `fatbin` transform it to binary as `fatbin` is not yet // supported. - if (format > gpu::CompilationTarget::Binary) + gpu::KernelTableAttr kernels; + if (format > gpu::CompilationTarget::Binary) { format = gpu::CompilationTarget::Binary; + kernels = ROCDL::getKernelMetadata(module, object); + } DictionaryAttr properties{}; Builder builder(attribute.getContext()); StringAttr objectStr = builder.getStringAttr(StringRef(object.data(), object.size())); return builder.getAttr(attribute, format, objectStr, - properties, nullptr); + properties, kernels); } diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp index 44b8100103e7b..e1226997cedf7 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp @@ -31,7 +31,7 @@ mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder, llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( buffer, kernels, elfABIVersion); // Return `nullopt` if the metadata couldn't be retrieved. - if (!error) { + if (error) { llvm::consumeError(std::move(error)); return std::nullopt; } diff --git a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp index 37dbfe6203687..3e9fe7b87cd1f 100644 --- a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp +++ b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp @@ -116,7 +116,7 @@ TargetAttrImpl::createObject(Attribute attribute, Operation *module, module->getContext(), attribute, gpu::CompilationTarget::Offload, StringAttr::get(module->getContext(), StringRef(object.data(), object.size())), - module->getAttrDictionary()); + module->getAttrDictionary(), nullptr); } TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(TargetAttrAPI)) { From d2873103483bb4b23888e67b73d7f394c1a3c029 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 27 Aug 2024 15:47:22 -0400 Subject: [PATCH 11/15] Update mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp Co-authored-by: Mehdi Amini --- mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp index 3e9fe7b87cd1f..aaa281e07933b 100644 --- a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp +++ b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp @@ -116,7 +116,7 @@ TargetAttrImpl::createObject(Attribute attribute, Operation *module, module->getContext(), attribute, gpu::CompilationTarget::Offload, StringAttr::get(module->getContext(), StringRef(object.data(), object.size())), - module->getAttrDictionary(), nullptr); + module->getAttrDictionary(), /*kernels=*/nullptr); } TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(TargetAttrAPI)) { From 38e53cc6b26c0bdfe4705092aef3aa926f179ab0 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 27 Aug 2024 15:47:32 -0400 Subject: [PATCH 12/15] Update mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td Co-authored-by: Mehdi Amini --- mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index dba1186af397f..00201554ea3b0 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -23,7 +23,7 @@ include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { let description = [{ GPU attribute for storing metadata related to a compiled kernel. The - attribute contains the name and function type of the kernel. + attribute contains the name and arguments type of the kernel. The attribute also contains optional parameters for storing the arguments attributes as well as a dictionary for additional metadata, like occupancy From d156a9fb379e029c19fde3b371c9f082aca0fe70 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 27 Aug 2024 15:54:16 -0400 Subject: [PATCH 13/15] Apply suggestions from code review Co-authored-by: Mehdi Amini --- mlir/lib/Target/LLVM/NVVM/Target.cpp | 2 +- mlir/lib/Target/SPIRV/Target.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp index 7e8e3ff6ed59a..806c405ac17df 100644 --- a/mlir/lib/Target/LLVM/NVVM/Target.cpp +++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp @@ -604,5 +604,5 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module, return builder.getAttr( attribute, format, builder.getStringAttr(StringRef(object.data(), object.size())), - objectProps, nullptr); + objectProps, /*kernels=*/nullptr); } diff --git a/mlir/lib/Target/SPIRV/Target.cpp b/mlir/lib/Target/SPIRV/Target.cpp index e75966a20db45..dd128e254aa0d 100644 --- a/mlir/lib/Target/SPIRV/Target.cpp +++ b/mlir/lib/Target/SPIRV/Target.cpp @@ -98,5 +98,5 @@ SPIRVTargetAttrImpl::createObject(Attribute attribute, Operation *module, return builder.getAttr( attribute, format, builder.getStringAttr(StringRef(object.data(), object.size())), - objectProps, nullptr); + objectProps, /*kernels=*/nullptr); } From 9eaaba1cbcaf2f579194cc553ee030c7ec12970b Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 27 Aug 2024 21:07:02 +0000 Subject: [PATCH 14/15] address reviewer comments; add python tests and change KernelAttr to KernelMetadataAttr --- .../mlir/Dialect/GPU/IR/CompilationAttrs.td | 34 ++++++------ mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 52 ++++++++++--------- mlir/lib/Target/LLVM/ROCDL/Utils.cpp | 4 +- mlir/test/Dialect/GPU/invalid.mlir | 4 +- mlir/test/Dialect/GPU/ops.mlir | 12 ++--- mlir/test/python/dialects/gpu/dialect.py | 7 +++ .../Target/LLVM/SerializeROCDLTarget.cpp | 4 +- 7 files changed, 63 insertions(+), 54 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td index 00201554ea3b0..07879a0dab07f 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td +++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td @@ -17,10 +17,10 @@ include "mlir/Dialect/GPU/IR/GPUBase.td" include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td" //===----------------------------------------------------------------------===// -// GPU kernel attribute +// GPU kernel metadata attribute //===----------------------------------------------------------------------===// -def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { +def GPU_KernelMetadataAttr : GPU_Attr<"KernelMetadata", "kernel_metadata"> { let description = [{ GPU attribute for storing metadata related to a compiled kernel. The attribute contains the name and arguments type of the kernel. @@ -34,8 +34,8 @@ def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { Examples: ```mlir - #gpu.kernel<@kernel1, (i32) -> (), arg_attrs = [...], metadata = {reg_count = 255, ...}> - #gpu.kernel<@kernel2, (i32, f64) -> ()> + #gpu.kernel_metadata<@kernel1, (i32) -> (), arg_attrs = [...], metadata = {reg_count = 255, ...}> + #gpu.kernel_metadata<@kernel2, (i32, f64) -> ()> ``` }]; let parameters = (ins @@ -63,7 +63,7 @@ def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { let genVerifyDecl = 1; let extraClassDeclaration = [{ /// Compare two kernels based on the name. - bool operator<(const KernelAttr& other) const { + bool operator<(const KernelMetadataAttr& other) const { return getName().getValue() < other.getName().getValue(); } @@ -103,8 +103,8 @@ def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { return argDict ? argDict.get(name) : nullptr; } - /// Returns a new KernelAttr that contains `attrs` in the metadata dictionary. - KernelAttr appendMetadata(ArrayRef attrs) const; + /// Returns a new KernelMetadataAttr that contains `attrs` in the metadata dictionary. + KernelMetadataAttr appendMetadata(ArrayRef attrs) const; }]; } @@ -114,7 +114,7 @@ def GPU_KernelAttr : GPU_Attr<"Kernel", "kernel"> { def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { let description = [{ - GPU attribute representing a list of `#gpu.kernel` attributes. This + GPU attribute representing a list of `#gpu.kernel_metadata` attributes. This attribute supports searching kernels by name. All kernels in the table must have an unique name. @@ -124,32 +124,32 @@ def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { #gpu.kernel_table<> // Table with a single kernel. - #gpu.kernel_table<[#gpu.kernel () >]> + #gpu.kernel_table<[#gpu.kernel_metadata () >]> // Table with multiple kernels. #gpu.kernel_table<[ - #gpu.kernel<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, - #gpu.kernel<"kernel1", (i32) -> ()> + #gpu.kernel_metadata<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, + #gpu.kernel_metadata<"kernel1", (i32) -> ()> ]> ``` }]; let parameters = (ins - OptionalArrayRefParameter<"KernelAttr", "array of kernels">:$kernel_table + OptionalArrayRefParameter<"KernelMetadataAttr", "array of kernels">:$kernel_table ); let assemblyFormat = [{ `<` (`[` qualified($kernel_table)^ `]`)? `>` }]; let builders = [ - AttrBuilder<(ins "ArrayRef":$kernels, + AttrBuilder<(ins "ArrayRef":$kernels, CArg<"bool", "false">:$isSorted)> ]; let skipDefaultBuilders = 1; let genVerifyDecl = 1; let extraClassDeclaration = [{ - llvm::ArrayRef::iterator begin() const { + llvm::ArrayRef::iterator begin() const { return getKernelTable().begin(); } - llvm::ArrayRef::iterator end() const { + llvm::ArrayRef::iterator end() const { return getKernelTable().end(); } size_t size() const { @@ -160,8 +160,8 @@ def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> { } /// Returns the kernel with name `key` or `nullptr` if not present. - KernelAttr lookup(StringRef key) const; - KernelAttr lookup(StringAttr key) const; + KernelMetadataAttr lookup(StringRef key) const; + KernelMetadataAttr lookup(StringAttr key) const; }]; } diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 47f97c9635fb9..e45ba7838b453 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2179,38 +2179,41 @@ LogicalResult gpu::DynamicSharedMemoryOp::verify() { } //===----------------------------------------------------------------------===// -// GPU KernelAttr +// GPU KernelMetadataAttr //===----------------------------------------------------------------------===// -KernelAttr KernelAttr::get(FunctionOpInterface kernel, - DictionaryAttr metadata) { +KernelMetadataAttr KernelMetadataAttr::get(FunctionOpInterface kernel, + DictionaryAttr metadata) { assert(kernel && "invalid kernel"); return get(kernel.getNameAttr(), kernel.getFunctionType(), kernel.getAllArgAttrs(), metadata); } -KernelAttr KernelAttr::getChecked(function_ref emitError, - FunctionOpInterface kernel, - DictionaryAttr metadata) { +KernelMetadataAttr +KernelMetadataAttr::getChecked(function_ref emitError, + FunctionOpInterface kernel, + DictionaryAttr metadata) { assert(kernel && "invalid kernel"); return getChecked(emitError, kernel.getNameAttr(), kernel.getFunctionType(), kernel.getAllArgAttrs(), metadata); } -KernelAttr KernelAttr::appendMetadata(ArrayRef attrs) const { +KernelMetadataAttr +KernelMetadataAttr::appendMetadata(ArrayRef attrs) const { if (attrs.empty()) return *this; NamedAttrList attrList; if (DictionaryAttr dict = getMetadata()) attrList.append(dict); attrList.append(attrs); - return KernelAttr::get(getName(), getFunctionType(), getArgAttrs(), - attrList.getDictionary(getContext())); + return KernelMetadataAttr::get(getName(), getFunctionType(), getArgAttrs(), + attrList.getDictionary(getContext())); } -LogicalResult KernelAttr::verify(function_ref emitError, - StringAttr name, Type functionType, - ArrayAttr argAttrs, DictionaryAttr metadata) { +LogicalResult +KernelMetadataAttr::verify(function_ref emitError, + StringAttr name, Type functionType, + ArrayAttr argAttrs, DictionaryAttr metadata) { if (name.empty()) return emitError() << "the kernel name can't be empty"; if (argAttrs) { @@ -2228,7 +2231,7 @@ LogicalResult KernelAttr::verify(function_ref emitError, //===----------------------------------------------------------------------===// KernelTableAttr KernelTableAttr::get(MLIRContext *context, - ArrayRef kernels, + ArrayRef kernels, bool isSorted) { // Note that `is_sorted` is always only invoked once even with assertions ON. assert((!isSorted || llvm::is_sorted(kernels)) && @@ -2237,15 +2240,14 @@ KernelTableAttr KernelTableAttr::get(MLIRContext *context, if (isSorted || llvm::is_sorted(kernels)) return Base::get(context, kernels); // Sort the array. - SmallVector kernelsTmp(kernels); + SmallVector kernelsTmp(kernels); llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end()); return Base::get(context, kernelsTmp); } -KernelTableAttr -KernelTableAttr::getChecked(function_ref emitError, - MLIRContext *context, ArrayRef kernels, - bool isSorted) { +KernelTableAttr KernelTableAttr::getChecked( + function_ref emitError, MLIRContext *context, + ArrayRef kernels, bool isSorted) { // Note that `is_sorted` is always only invoked once even with assertions ON. assert((!isSorted || llvm::is_sorted(kernels)) && "expected a sorted kernel array"); @@ -2253,19 +2255,19 @@ KernelTableAttr::getChecked(function_ref emitError, if (isSorted || llvm::is_sorted(kernels)) return Base::getChecked(emitError, context, kernels); // Sort the array. - SmallVector kernelsTmp(kernels); + SmallVector kernelsTmp(kernels); llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end()); return Base::getChecked(emitError, context, kernelsTmp); } LogicalResult KernelTableAttr::verify(function_ref emitError, - ArrayRef kernels) { + ArrayRef kernels) { if (kernels.size() < 2) return success(); // Check that the kernels are uniquely named. if (std::adjacent_find(kernels.begin(), kernels.end(), - [](KernelAttr l, KernelAttr r) { + [](KernelMetadataAttr l, KernelMetadataAttr r) { return l.getName() == r.getName(); }) != kernels.end()) { return emitError() << "expected all kernels to be uniquely named"; @@ -2273,14 +2275,14 @@ KernelTableAttr::verify(function_ref emitError, return success(); } -KernelAttr KernelTableAttr::lookup(StringRef key) const { +KernelMetadataAttr KernelTableAttr::lookup(StringRef key) const { auto [iterator, found] = impl::findAttrSorted(begin(), end(), key); - return found ? *iterator : KernelAttr(); + return found ? *iterator : KernelMetadataAttr(); } -KernelAttr KernelTableAttr::lookup(StringAttr key) const { +KernelMetadataAttr KernelTableAttr::lookup(StringAttr key) const { auto [iterator, found] = impl::findAttrSorted(begin(), end(), key); - return found ? *iterator : KernelAttr(); + return found ? *iterator : KernelMetadataAttr(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp index e1226997cedf7..04b1b22279e5d 100644 --- a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp +++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp @@ -72,13 +72,13 @@ gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule, ArrayRef elfData) { auto module = cast(gpuModule); Builder builder(module.getContext()); - SmallVector kernels; + SmallVector kernels; std::optional> mdMapOrNull = getAMDHSAKernelsELFMetadata(builder, elfData); for (auto funcOp : module.getBody()->getOps()) { if (!funcOp->getDiscardableAttr("rocdl.kernel")) continue; - kernels.push_back(gpu::KernelAttr::get( + kernels.push_back(gpu::KernelMetadataAttr::get( funcOp, mdMapOrNull ? builder.getDictionaryAttr( mdMapOrNull->lookup(funcOp.getNameAttr())) : nullptr)); diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index 8c0e0f1a87fd7..fd7618020b5d8 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -854,8 +854,8 @@ module attributes {gpu.container_module} { gpu.binary @binary [#gpu.object<#rocdl.target, // expected-error@+1{{expected all kernels to be uniquely named}} kernels = #gpu.kernel_table<[ - #gpu.kernel<"kernel", (i32) -> ()>, - #gpu.kernel<"kernel", (i32, f32) -> (), metadata = {sgpr_count = 255}> + #gpu.kernel_metadata<"kernel", (i32) -> ()>, + #gpu.kernel_metadata<"kernel", (i32, f32) -> (), metadata = {sgpr_count = 255}> // expected-error@below{{failed to parse GPU_ObjectAttr parameter 'kernels' which is to be a `KernelTableAttr`}} ]>, bin = "BLOB"> diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index f80dca7552623..b9c0a0e79e8f2 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -446,8 +446,8 @@ gpu.module @module_with_offload_handler <#gpu.select_object<0>> [#nvvm.target] { gpu.binary @kernel_attrs_1 [ #gpu.object<#rocdl.target, kernels = #gpu.kernel_table<[ - #gpu.kernel<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, - #gpu.kernel<"kernel1", (i32) -> (), arg_attrs = [{llvm.read_only}]> + #gpu.kernel_metadata<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>, + #gpu.kernel_metadata<"kernel1", (i32) -> (), arg_attrs = [{llvm.read_only}]> ]>, bin = "BLOB"> ] @@ -455,12 +455,12 @@ gpu.binary @kernel_attrs_1 [ // Verify the kernels are sorted // CHECK-LABEL: gpu.binary @kernel_attrs_2 gpu.binary @kernel_attrs_2 [ - // CHECK: [#gpu.kernel<"a_kernel", () -> ()>, #gpu.kernel<"m_kernel", () -> ()>, #gpu.kernel<"z_kernel", () -> ()>] + // CHECK: [#gpu.kernel_metadata<"a_kernel", () -> ()>, #gpu.kernel_metadata<"m_kernel", () -> ()>, #gpu.kernel_metadata<"z_kernel", () -> ()>] #gpu.object<#rocdl.target, kernels = #gpu.kernel_table<[ - #gpu.kernel<"z_kernel", () -> ()>, - #gpu.kernel<"m_kernel", () -> ()>, - #gpu.kernel<"a_kernel", () -> ()> + #gpu.kernel_metadata<"z_kernel", () -> ()>, + #gpu.kernel_metadata<"m_kernel", () -> ()>, + #gpu.kernel_metadata<"a_kernel", () -> ()> ]>, bin = "BLOB"> ] diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index aded35b04aa1e..9ab8db9fc6c82 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -55,3 +55,10 @@ def testObjectAttr(): # CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50"> print(o) assert o.object == object + + object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef" + kernelTable = Attribute.parse("#gpu.kernel_table<[#gpu.kernel_metadata<\"kernel\", () -> ()>]>") + o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable) + # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf"> + print(o) + assert o.kernels == kernelTable diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp index 100c483c515de..43fa3d850d9e2 100644 --- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp +++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp @@ -213,8 +213,8 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) { ASSERT_TRUE(metadata.lookup("rocdl_kernel_0") != nullptr); // Check that the kernel doesn't exist. ASSERT_TRUE(metadata.lookup("not_existent_kernel") == nullptr); - // Test the `ROCDLObjectMDAttr` iterators. - for (gpu::KernelAttr kernel : metadata) { + // Test the `KernelMetadataAttr` iterators. + for (gpu::KernelMetadataAttr kernel : metadata) { // Check that the ELF metadata is present. ASSERT_TRUE(kernel.getMetadata() != nullptr); // Verify that `sgpr_count` is present and it is an integer attribute. From 472165d0096bb361a612756c8ad18c31b6746cf2 Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Tue, 27 Aug 2024 21:25:37 +0000 Subject: [PATCH 15/15] fix python format --- mlir/test/python/dialects/gpu/dialect.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 9ab8db9fc6c82..26ee9f34cb332 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -57,7 +57,9 @@ def testObjectAttr(): assert o.object == object object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef" - kernelTable = Attribute.parse("#gpu.kernel_table<[#gpu.kernel_metadata<\"kernel\", () -> ()>]>") + kernelTable = Attribute.parse( + '#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>' + ) o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable) # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf"> print(o)