diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h index f54dd7ba7ab45..abaea843848b2 100644 --- a/llvm/include/llvm/Frontend/Offloading/Utility.h +++ b/llvm/include/llvm/Frontend/Offloading/Utility.h @@ -9,8 +9,14 @@ #ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H #define LLVM_FRONTEND_OFFLOADING_UTILITY_H +#include + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/Module.h" #include "llvm/Object/OffloadBinary.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBufferRef.h" namespace llvm { namespace offloading { @@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name, std::pair getOffloadEntryArray(Module &M, StringRef SectionName); +namespace amdgpu { +/// Check if an image is compatible with current system's environment. The +/// system environment is given as a 'target-id' which has the form: +/// +/// := ( ":" ( "+" | "-" ) )* +/// +/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any' +/// and is compatible with either '+' or '-'. The HSA runtime returns this +/// information using the target-id, while we use the ELF header to determine +/// these features. +bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags, + StringRef EnvTargetID); + +/// Struct for holding metadata related to AMDGPU kernels, for more information +/// about the metadata and its meaning see: +/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3 +struct AMDGPUKernelMetaData { + /// Constant indicating that a value is invalid. + static constexpr uint32_t KInvalidValue = + std::numeric_limits::max(); + /// The amount of group segment memory required by a work-group in bytes. + uint32_t GroupSegmentList = KInvalidValue; + /// The amount of fixed private address space memory required for a work-item + /// in bytes. + uint32_t PrivateSegmentSize = KInvalidValue; + /// Number of scalar registers required by a wavefront. + uint32_t SGPRCount = KInvalidValue; + /// Number of vector registers required by each work-item. + uint32_t VGPRCount = KInvalidValue; + /// Number of stores from a scalar register to a register allocator created + /// spill location. + uint32_t SGPRSpillCount = KInvalidValue; + /// Number of stores from a vector register to a register allocator created + /// spill location. + uint32_t VGPRSpillCount = KInvalidValue; + /// Number of accumulator registers required by each work-item. + uint32_t AGPRCount = KInvalidValue; + /// Corresponds to the OpenCL reqd_work_group_size attribute. + uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue, + KInvalidValue}; + /// Corresponds to the OpenCL work_group_size_hint attribute. + uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue}; + /// Wavefront size. + uint32_t WavefrontSize = KInvalidValue; + /// Maximum flat work-group size supported by the kernel in work-items. + uint32_t MaxFlatWorkgroupSize = KInvalidValue; +}; + +/// Reads AMDGPU specific metadata from the ELF file and propagates the +/// KernelInfoMap. +Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer, + StringMap &KernelInfoMap, + uint16_t &ELFABIVersion); +} // namespace amdgpu } // namespace offloading } // namespace llvm diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt index 16e0dcfa0e90d..ce445ad9cc4cb 100644 --- a/llvm/lib/Frontend/Offloading/CMakeLists.txt +++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading LINK_COMPONENTS Core BinaryFormat + Object Support TransformUtils TargetParser diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp index 919b9462e32d4..010c0bfd3be76 100644 --- a/llvm/lib/Frontend/Offloading/Utility.cpp +++ b/llvm/lib/Frontend/Offloading/Utility.cpp @@ -7,10 +7,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Frontend/Offloading/Utility.h" +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MsgPackDocument.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Value.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/MemoryBufferRef.h" +#include "llvm/Support/YAMLTraits.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) { return std::make_pair(EntriesB, EntriesE); } + +bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch, + uint32_t ImageFlags, + StringRef EnvTargetID) { + using namespace llvm::ELF; + StringRef EnvArch = EnvTargetID.split(":").first; + + // Trivial check if the base processors match. + if (EnvArch != ImageArch) + return false; + + // Check if the image is requesting xnack on or off. + switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) { + case EF_AMDGPU_FEATURE_XNACK_OFF_V4: + // The image is 'xnack-' so the environment must be 'xnack-'. + if (!EnvTargetID.contains("xnack-")) + return false; + break; + case EF_AMDGPU_FEATURE_XNACK_ON_V4: + // The image is 'xnack+' so the environment must be 'xnack+'. + if (!EnvTargetID.contains("xnack+")) + return false; + break; + case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4: + case EF_AMDGPU_FEATURE_XNACK_ANY_V4: + default: + break; + } + + // Check if the image is requesting sramecc on or off. + switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) { + case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4: + // The image is 'sramecc-' so the environment must be 'sramecc-'. + if (!EnvTargetID.contains("sramecc-")) + return false; + break; + case EF_AMDGPU_FEATURE_SRAMECC_ON_V4: + // The image is 'sramecc+' so the environment must be 'sramecc+'. + if (!EnvTargetID.contains("sramecc+")) + return false; + break; + case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4: + case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4: + break; + } + + return true; +} + +namespace { +/// Reads the AMDGPU specific per-kernel-metadata from an image. +class KernelInfoReader { +public: + KernelInfoReader(StringMap &KIM) + : KernelInfoMap(KIM) {} + + /// Process ELF note to read AMDGPU metadata from respective information + /// fields. + Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) { + if (Note.getName() != "AMDGPU") + return Error::success(); // We are not interested in other things + + assert(Note.getType() == ELF::NT_AMDGPU_METADATA && + "Parse AMDGPU MetaData"); + auto Desc = Note.getDesc(Align); + StringRef MsgPackString = + StringRef(reinterpret_cast(Desc.data()), Desc.size()); + msgpack::Document MsgPackDoc; + if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false)) + return Error::success(); + + AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); + if (!Verifier.verify(MsgPackDoc.getRoot())) + return Error::success(); + + auto RootMap = MsgPackDoc.getRoot().getMap(true); + + if (auto Err = iterateAMDKernels(RootMap)) + return Err; + + return Error::success(); + } + +private: + /// Extracts the relevant information via simple string look-up in the msgpack + /// document elements. + Error + extractKernelData(msgpack::MapDocNode::MapTy::value_type V, + std::string &KernelName, + offloading::amdgpu::AMDGPUKernelMetaData &KernelData) { + if (!V.first.isString()) + return Error::success(); + + const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) { + return DK.getString() == SK; + }; + + const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN, + uint32_t *Vals) { + assert(DN.isArray() && "MsgPack DocNode is an array node"); + auto DNA = DN.getArray(); + assert(DNA.size() == 3 && "ArrayNode has at most three elements"); + + int I = 0; + for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd; + ++DNABegin) { + Vals[I++] = DNABegin->getUInt(); + } + }; + + if (IsKey(V.first, ".name")) { + KernelName = V.second.toString(); + } else if (IsKey(V.first, ".sgpr_count")) { + KernelData.SGPRCount = V.second.getUInt(); + } else if (IsKey(V.first, ".sgpr_spill_count")) { + KernelData.SGPRSpillCount = V.second.getUInt(); + } else if (IsKey(V.first, ".vgpr_count")) { + KernelData.VGPRCount = V.second.getUInt(); + } else if (IsKey(V.first, ".vgpr_spill_count")) { + KernelData.VGPRSpillCount = V.second.getUInt(); + } else if (IsKey(V.first, ".agpr_count")) { + KernelData.AGPRCount = V.second.getUInt(); + } else if (IsKey(V.first, ".private_segment_fixed_size")) { + KernelData.PrivateSegmentSize = V.second.getUInt(); + } else if (IsKey(V.first, ".group_segment_fixed_size")) { + KernelData.GroupSegmentList = V.second.getUInt(); + } else if (IsKey(V.first, ".reqd_workgroup_size")) { + GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize); + } else if (IsKey(V.first, ".workgroup_size_hint")) { + GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint); + } else if (IsKey(V.first, ".wavefront_size")) { + KernelData.WavefrontSize = V.second.getUInt(); + } else if (IsKey(V.first, ".max_flat_workgroup_size")) { + KernelData.MaxFlatWorkgroupSize = V.second.getUInt(); + } + + return Error::success(); + } + + /// Get the "amdhsa.kernels" element from the msgpack Document + Expected getAMDKernelsArray(msgpack::MapDocNode &MDN) { + auto Res = MDN.find("amdhsa.kernels"); + if (Res == MDN.end()) + return createStringError(inconvertibleErrorCode(), + "Could not find amdhsa.kernels key"); + + auto Pair = *Res; + assert(Pair.second.isArray() && + "AMDGPU kernel entries are arrays of entries"); + + return Pair.second.getArray(); + } + + /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a + /// MapDocNode that either maps a string to a single value (most of them) or + /// to another array of things. Currently, we only handle the case that maps + /// to scalar value. + Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) { + offloading::amdgpu::AMDGPUKernelMetaData KernelData; + std::string KernelName; + auto Entry = (*It).getMap(); + for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI) + if (auto Err = extractKernelData(*MI, KernelName, KernelData)) + return Err; + + KernelInfoMap.insert({KernelName, KernelData}); + return Error::success(); + } + + /// Go over the list of AMD kernels in the "amdhsa.kernels" entry + Error iterateAMDKernels(msgpack::MapDocNode &MDN) { + auto KernelsOrErr = getAMDKernelsArray(MDN); + if (auto Err = KernelsOrErr.takeError()) + return Err; + + auto KernelsArr = *KernelsOrErr; + for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) { + if (!It->isMap()) + continue; // we expect pairs + + // Obtain the value for the different entries. Each array entry is a + // MapDocNode + if (auto Err = generateKernelInfo(It)) + return Err; + } + return Error::success(); + } + + // Kernel names are the keys + StringMap &KernelInfoMap; +}; +} // namespace + +Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( + MemoryBufferRef MemBuffer, + StringMap &KernelInfoMap, + uint16_t &ELFABIVersion) { + Error Err = Error::success(); // Used later as out-parameter + + auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer()); + if (auto Err = ELFOrError.takeError()) + return Err; + + const object::ELF64LEFile ELFObj = ELFOrError.get(); + Expected> Sections = ELFObj.sections(); + if (!Sections) + return Sections.takeError(); + KernelInfoReader Reader(KernelInfoMap); + + // Read the code object version from ELF image header + auto Header = ELFObj.getHeader(); + ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]); + for (const auto &S : *Sections) { + if (S.sh_type != ELF::SHT_NOTE) + continue; + + for (const auto N : ELFObj.notes(S, Err)) { + if (Err) + return Err; + // Fills the KernelInfoTabel entries in the reader + if ((Err = Reader.processNote(N, S.sh_addralign))) + return Err; + } + } + return Error::success(); +} diff --git a/offload/plugins-nextgen/amdgpu/CMakeLists.txt b/offload/plugins-nextgen/amdgpu/CMakeLists.txt index 47cd2feefc728..b40c62d43226f 100644 --- a/offload/plugins-nextgen/amdgpu/CMakeLists.txt +++ b/offload/plugins-nextgen/amdgpu/CMakeLists.txt @@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS) message(STATUS "Building AMDGPU plugin linked against libhsa") - target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64) + target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading) else() message(STATUS "Building AMDGPU plugin for dlopened libhsa") target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa) target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp) + target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading) endif() # Configure testing for the AMDGPU plugin. We will build tests if we could a diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 604683370cd27..a434a0089d5f9 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy { findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const; /// Get additional info for kernel, e.g., register spill counts - std::optional + std::optional getKernelInfo(StringRef Identifier) const { auto It = KernelInfoMap.find(Identifier); @@ -499,7 +499,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy { /// The exectuable loaded on the agent. hsa_executable_t Executable; hsa_code_object_t CodeObject; - StringMap KernelInfoMap; + StringMap KernelInfoMap; uint16_t ELFABIVersion; }; @@ -600,7 +600,7 @@ struct AMDGPUKernelTy : public GenericKernelTy { uint32_t ImplicitArgsSize; /// Additional Info for the AMD GPU Kernel - std::optional KernelInfo; + std::optional KernelInfo; }; /// Class representing an HSA signal. Signals are used to define dependencies @@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy { utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId)); if (!TargeTripleAndFeaturesOrError) return TargeTripleAndFeaturesOrError.takeError(); - return utils::isImageCompatibleWithEnv(Processor ? *Processor : "", - ElfOrErr->getPlatformFlags(), - *TargeTripleAndFeaturesOrError); + return offloading::amdgpu::isImageCompatibleWithEnv( + Processor ? *Processor : "", ElfOrErr->getPlatformFlags(), + *TargeTripleAndFeaturesOrError); } bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override { diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h index 58a3b5df00fac..0b6bc50ebf1d8 100644 --- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h +++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h @@ -17,24 +17,13 @@ #include "omptarget.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Error.h" - -#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/BinaryFormat/MsgPackDocument.h" -#include "llvm/Support/MemoryBufferRef.h" -#include "llvm/Support/YAMLTraits.h" - -using namespace llvm::ELF; +#include "llvm/Frontend/Offloading/Utility.h" namespace llvm { namespace omp { namespace target { namespace plugin { namespace utils { - // The implicit arguments of COV5 AMDGPU kernels. struct AMDGPUImplicitArgsTy { uint32_t BlockCountX; @@ -55,259 +44,26 @@ struct AMDGPUImplicitArgsTyCOV4 { uint8_t Unused[56]; }; +/// Returns the size in bytes of the implicit arguments of AMDGPU kernels. +/// `Version` is the ELF ABI version, e.g. COV5. inline uint32_t getImplicitArgsSize(uint16_t Version) { return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5 ? sizeof(AMDGPUImplicitArgsTyCOV4) : sizeof(AMDGPUImplicitArgsTy); } -/// Check if an image is compatible with current system's environment. The -/// system environment is given as a 'target-id' which has the form: -/// -/// := ( ":" ( "+" | "-" ) )* -/// -/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any' -/// and is compatible with either '+' or '-'. The HSA runtime returns this -/// information using the target-id, while we use the ELF header to determine -/// these features. -inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags, - StringRef EnvTargetID) { - StringRef EnvArch = EnvTargetID.split(":").first; - - // Trivial check if the base processors match. - if (EnvArch != ImageArch) - return false; - - // Check if the image is requesting xnack on or off. - switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) { - case EF_AMDGPU_FEATURE_XNACK_OFF_V4: - // The image is 'xnack-' so the environment must be 'xnack-'. - if (!EnvTargetID.contains("xnack-")) - return false; - break; - case EF_AMDGPU_FEATURE_XNACK_ON_V4: - // The image is 'xnack+' so the environment must be 'xnack+'. - if (!EnvTargetID.contains("xnack+")) - return false; - break; - case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4: - case EF_AMDGPU_FEATURE_XNACK_ANY_V4: - default: - break; - } - - // Check if the image is requesting sramecc on or off. - switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) { - case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4: - // The image is 'sramecc-' so the environment must be 'sramecc-'. - if (!EnvTargetID.contains("sramecc-")) - return false; - break; - case EF_AMDGPU_FEATURE_SRAMECC_ON_V4: - // The image is 'sramecc+' so the environment must be 'sramecc+'. - if (!EnvTargetID.contains("sramecc+")) - return false; - break; - case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4: - case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4: - break; - } - - return true; -} - -struct KernelMetaDataTy { - uint64_t KernelObject; - uint32_t GroupSegmentList; - uint32_t PrivateSegmentSize; - uint32_t SGPRCount; - uint32_t VGPRCount; - uint32_t SGPRSpillCount; - uint32_t VGPRSpillCount; - uint32_t KernelSegmentSize; - uint32_t ExplicitArgumentCount; - uint32_t ImplicitArgumentCount; - uint32_t RequestedWorkgroupSize[3]; - uint32_t WorkgroupSizeHint[3]; - uint32_t WavefronSize; - uint32_t MaxFlatWorkgroupSize; -}; -namespace { - -/// Reads the AMDGPU specific per-kernel-metadata from an image. -class KernelInfoReader { -public: - KernelInfoReader(StringMap &KIM) : KernelInfoMap(KIM) {} - - /// Process ELF note to read AMDGPU metadata from respective information - /// fields. - Error processNote(const object::ELF64LE::Note &Note, size_t Align) { - if (Note.getName() != "AMDGPU") - return Error::success(); // We are not interested in other things - - assert(Note.getType() == ELF::NT_AMDGPU_METADATA && - "Parse AMDGPU MetaData"); - auto Desc = Note.getDesc(Align); - StringRef MsgPackString = - StringRef(reinterpret_cast(Desc.data()), Desc.size()); - msgpack::Document MsgPackDoc; - if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false)) - return Error::success(); - - AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); - if (!Verifier.verify(MsgPackDoc.getRoot())) - return Error::success(); - - auto RootMap = MsgPackDoc.getRoot().getMap(true); - - if (auto Err = iterateAMDKernels(RootMap)) - return Err; - - return Error::success(); - } - -private: - /// Extracts the relevant information via simple string look-up in the msgpack - /// document elements. - Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V, - std::string &KernelName, - KernelMetaDataTy &KernelData) { - if (!V.first.isString()) - return Error::success(); - - const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) { - return DK.getString() == SK; - }; - - const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN, - uint32_t *Vals) { - assert(DN.isArray() && "MsgPack DocNode is an array node"); - auto DNA = DN.getArray(); - assert(DNA.size() == 3 && "ArrayNode has at most three elements"); - - int I = 0; - for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd; - ++DNABegin) { - Vals[I++] = DNABegin->getUInt(); - } - }; - - if (IsKey(V.first, ".name")) { - KernelName = V.second.toString(); - } else if (IsKey(V.first, ".sgpr_count")) { - KernelData.SGPRCount = V.second.getUInt(); - } else if (IsKey(V.first, ".sgpr_spill_count")) { - KernelData.SGPRSpillCount = V.second.getUInt(); - } else if (IsKey(V.first, ".vgpr_count")) { - KernelData.VGPRCount = V.second.getUInt(); - } else if (IsKey(V.first, ".vgpr_spill_count")) { - KernelData.VGPRSpillCount = V.second.getUInt(); - } else if (IsKey(V.first, ".private_segment_fixed_size")) { - KernelData.PrivateSegmentSize = V.second.getUInt(); - } else if (IsKey(V.first, ".group_segment_fixed_size")) { - KernelData.GroupSegmentList = V.second.getUInt(); - } else if (IsKey(V.first, ".reqd_workgroup_size")) { - GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize); - } else if (IsKey(V.first, ".workgroup_size_hint")) { - GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint); - } else if (IsKey(V.first, ".wavefront_size")) { - KernelData.WavefronSize = V.second.getUInt(); - } else if (IsKey(V.first, ".max_flat_workgroup_size")) { - KernelData.MaxFlatWorkgroupSize = V.second.getUInt(); - } - - return Error::success(); - } - - /// Get the "amdhsa.kernels" element from the msgpack Document - Expected getAMDKernelsArray(msgpack::MapDocNode &MDN) { - auto Res = MDN.find("amdhsa.kernels"); - if (Res == MDN.end()) - return createStringError(inconvertibleErrorCode(), - "Could not find amdhsa.kernels key"); - - auto Pair = *Res; - assert(Pair.second.isArray() && - "AMDGPU kernel entries are arrays of entries"); - - return Pair.second.getArray(); - } - - /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a - /// MapDocNode that either maps a string to a single value (most of them) or - /// to another array of things. Currently, we only handle the case that maps - /// to scalar value. - Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) { - KernelMetaDataTy KernelData; - std::string KernelName; - auto Entry = (*It).getMap(); - for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI) - if (auto Err = extractKernelData(*MI, KernelName, KernelData)) - return Err; - - KernelInfoMap.insert({KernelName, KernelData}); - return Error::success(); - } - - /// Go over the list of AMD kernels in the "amdhsa.kernels" entry - Error iterateAMDKernels(msgpack::MapDocNode &MDN) { - auto KernelsOrErr = getAMDKernelsArray(MDN); - if (auto Err = KernelsOrErr.takeError()) - return Err; - - auto KernelsArr = *KernelsOrErr; - for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) { - if (!It->isMap()) - continue; // we expect pairs - - // Obtain the value for the different entries. Each array entry is a - // MapDocNode - if (auto Err = generateKernelInfo(It)) - return Err; - } - return Error::success(); - } - - // Kernel names are the keys - StringMap &KernelInfoMap; -}; -} // namespace - /// Reads the AMDGPU specific metadata from the ELF file and propagates the /// KernelInfoMap -inline Error -readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer, - StringMap &KernelInfoMap, - uint16_t &ELFABIVersion) { - Error Err = Error::success(); // Used later as out-parameter - - auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer()); - if (auto Err = ELFOrError.takeError()) +inline Error readAMDGPUMetaDataFromImage( + MemoryBufferRef MemBuffer, + StringMap &KernelInfoMap, + uint16_t &ELFABIVersion) { + Error Err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( + MemBuffer, KernelInfoMap, ELFABIVersion); + if (!Err) return Err; - - const object::ELF64LEFile ELFObj = ELFOrError.get(); - ArrayRef Sections = cantFail(ELFObj.sections()); - KernelInfoReader Reader(KernelInfoMap); - - // Read the code object version from ELF image header - auto Header = ELFObj.getHeader(); - ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]); DP("ELFABIVERSION Version: %u\n", ELFABIVersion); - - for (const auto &S : Sections) { - if (S.sh_type != ELF::SHT_NOTE) - continue; - - for (const auto N : ELFObj.notes(S, Err)) { - if (Err) - return Err; - // Fills the KernelInfoTabel entries in the reader - if ((Err = Reader.processNote(N, S.sh_addralign))) - return Err; - } - } - - return Error::success(); + return Err; } } // namespace utils