[LLVM][DWARF] Add support for monolithic types in .debug_names #68131

ayermolo · 2023-10-03T17:25:46Z

Added support for Type Units in monolithic DWARF in .debug_names.

llvmbot · 2023-10-03T17:26:52Z

@llvm/pr-subscribers-debuginfo

Changes

[LLVM][DWARF] Add support for monolithic types in .debug_names

Patch is 55.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68131.diff

21 Files Affected:

(modified) llvm/include/llvm/CodeGen/AccelTable.h (+28-7)
(modified) llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h (+24)
(modified) llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp (+112-60)
(modified) llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (+23-8)
(modified) llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h (+11)
(modified) llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp (+4)
(modified) llvm/lib/CodeGen/AsmPrinter/DwarfFile.h (+14)
(modified) llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp (+4)
(modified) llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h (+9)
(modified) llvm/lib/DWARFLinker/DWARFStreamer.cpp (+11-2)
(modified) llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp (+14-4)
(modified) llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp (+1-1)
(modified) llvm/lib/DebugInfo/DWARF/DWARFContext.cpp (+44-4)
(modified) llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp (+5-2)
(modified) llvm/test/DebugInfo/X86/accel-tables-dwarf5.ll (-1)
(modified) llvm/test/DebugInfo/X86/debug-names-dwarf64.ll (+12-10)
(added) llvm/test/DebugInfo/X86/debug-names-types-monolithic.ll (+168)
(added) llvm/test/DebugInfo/X86/debug-names-types-split.ll (+57)
(modified) llvm/test/DebugInfo/X86/dwarfdump-debug-names.s (+20-16)
(modified) llvm/test/tools/llvm-dwarfdump/X86/debug-names-misaligned.s (+1-1)
(modified) llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp (+21)

diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index d521b31e3d16ab4..537f6a4d42bef0e 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -106,6 +106,7 @@ namespace llvm {
 class AsmPrinter;
 class DwarfCompileUnit;
 class DwarfDebug;
+class DwarfTypeUnit;
 class MCSymbol;
 class raw_ostream;
 
@@ -197,6 +198,9 @@ template <typename DataT> class AccelTable : public AccelTableBase {
 
   template <typename... Types>
   void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
+  void clear() { Entries.clear(); }
+  void addEntries(AccelTable<DataT> &Table);
+  const StringEntries getEntries() const { return Entries; }
 };
 
 template <typename AccelTableDataT>
@@ -215,6 +219,16 @@ void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
                           AccelTableDataT(std::forward<Types>(Args)...));
 }
 
+template <typename AccelTableDataT>
+void AccelTable<AccelTableDataT>::addEntries(
+    AccelTable<AccelTableDataT> &Table) {
+  for (auto &Entry : Table.getEntries()) {
+    for (AccelTableData *Value : Entry.second.Values)
+      addName(Entry.second.Name,
+              static_cast<AccelTableDataT *>(Value)->getDie());
+  }
+}
+
 /// A base class for different implementations of Data classes for Apple
 /// Accelerator Tables. The columns in the table are defined by the static Atoms
 /// variable defined on the subclasses.
@@ -250,6 +264,10 @@ class AppleAccelTableData : public AccelTableData {
 /// emitDWARF5AccelTable function.
 class DWARF5AccelTableData : public AccelTableData {
 public:
+  struct AttributeEncoding {
+    dwarf::Index Index;
+    dwarf::Form Form;
+  };
   static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
 
   DWARF5AccelTableData(const DIE &Die) : Die(Die) {}
@@ -309,17 +327,20 @@ void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
 void emitDWARF5AccelTable(AsmPrinter *Asm,
                           AccelTable<DWARF5AccelTableData> &Contents,
                           const DwarfDebug &DD,
-                          ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
-
+                          ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs,
+                          ArrayRef<std::unique_ptr<DwarfTypeUnit>> TUs);
+using GetIndexForEntryReturnType =
+    std::optional<std::pair<unsigned, DWARF5AccelTableData::AttributeEncoding>>;
 /// Emit a DWARFv5 Accelerator Table consisting of entries in the specified
 /// AccelTable. The \p CUs contains either symbols keeping offsets to the
 /// start of compilation unit, either offsets to the start of compilation
 /// unit themselves.
-void emitDWARF5AccelTable(
-    AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents,
-    ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
-    llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)>
-        getCUIndexForEntry);
+void emitDWARF5AccelTable(AsmPrinter *Asm,
+                          AccelTable<DWARF5AccelTableStaticData> &Contents,
+                          ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
+                          llvm::function_ref<GetIndexForEntryReturnType(
+                              const DWARF5AccelTableStaticData &)>
+                              getIndexForEntry);
 
 /// Accelerator table data implementation for simple Apple accelerator tables
 /// with just a DIE reference.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 4bd8394e6b4ec82..f05106522c4b554 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -125,6 +125,12 @@ class DWARFContext : public DIContext {
   DWARFUnitVector &getDWOUnits(bool Lazy = false);
 
   std::unique_ptr<const DWARFObject> DObj;
+  /// Can be optionally set by tools that work with .dwo/.dwp files to reference
+  /// main binary debug information. Usefull for accessing .debug_ranges and
+  /// .debug_addr section.
+  std::unique_ptr<const DWARFObject> MainBinaryDObj = nullptr;
+  /// DWARFContext for main binary.
+  std::unique_ptr<DWARFContext> MainBinaryDICtx = nullptr;
 
   // When set parses debug_info.dwo/debug_abbrev.dwo manually and populates CU
   // Index, and TU Index for DWARF5.
@@ -145,6 +151,13 @@ class DWARFContext : public DIContext {
 
   const DWARFObject &getDWARFObj() const { return *DObj; }
 
+  const DWARFObject *getMainBinaryDWARFObj() const {
+    return MainBinaryDObj.get();
+  }
+  const DWARFContext *getDWARFContextMainBinary() const {
+    return MainBinaryDICtx.get();
+  }
+
   static bool classof(const DIContext *DICtx) {
     return DICtx->getKind() == CK_DWARF;
   }
@@ -478,6 +491,17 @@ class DWARFContext : public DIContext {
   /// manually only for DWARF5.
   void setParseCUTUIndexManually(bool PCUTU) { ParseCUTUIndexManually = PCUTU; }
 
+  /// Sets the object corresponding to the main binary to which the .dwo/.dwp
+  /// file belongs.
+  void setMainBinaryObjAndCreateContext(
+      const object::ObjectFile &Obj,
+      ProcessDebugRelocations RelocAction = ProcessDebugRelocations::Process,
+      const LoadedObjectInfo *L = nullptr,
+      std::function<void(Error)> RecoverableErrorHandler =
+          WithColor::defaultErrorHandler,
+      std::function<void(Error)> WarningHandler =
+          WithColor::defaultWarningHandler);
+
 private:
   void addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, DWARFDie Die,
                        std::vector<DILocal> &Result);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 8f936037d132537..5510932e96d2598 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -13,7 +13,6 @@
 #include "llvm/CodeGen/AccelTable.h"
 #include "DwarfCompileUnit.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/AsmPrinter.h"
@@ -200,32 +199,30 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
     uint32_t AugmentationStringSize = sizeof(AugmentationString);
     char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'};
 
-    Header(uint32_t CompUnitCount, uint32_t BucketCount, uint32_t NameCount)
-        : CompUnitCount(CompUnitCount), BucketCount(BucketCount),
-          NameCount(NameCount) {}
+    Header(uint32_t CompUnitCount, uint32_t LocalTypeUnitCount,
+           uint32_t BucketCount, uint32_t NameCount)
+        : CompUnitCount(CompUnitCount), LocalTypeUnitCount(LocalTypeUnitCount),
+          BucketCount(BucketCount), NameCount(NameCount) {}
 
     void emit(Dwarf5AccelTableWriter &Ctx);
   };
-  struct AttributeEncoding {
-    dwarf::Index Index;
-    dwarf::Form Form;
-  };
 
   Header Header;
-  DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations;
+  DenseMap<uint32_t, SmallVector<DWARF5AccelTableData::AttributeEncoding, 2>>
+      Abbreviations;
   ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits;
-  llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry;
+  ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits;
+  llvm::function_ref<GetIndexForEntryReturnType(const DataT &)>
+      getIndexForEntry;
   MCSymbol *ContributionEnd = nullptr;
   MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start");
   MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end");
   MCSymbol *EntryPool = Asm->createTempSymbol("names_entries");
 
-  DenseSet<uint32_t> getUniqueTags() const;
-
-  // Right now, we emit uniform attributes for all tags.
-  SmallVector<AttributeEncoding, 2> getUniformAttributes() const;
+  void populateAbbrevsMap();
 
   void emitCUList() const;
+  void emitTUList() const;
   void emitBuckets() const;
   void emitStringOffsets() const;
   void emitAbbrevs() const;
@@ -236,7 +233,9 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
   Dwarf5AccelTableWriter(
       AsmPrinter *Asm, const AccelTableBase &Contents,
       ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
-      llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry);
+      ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
+      llvm::function_ref<GetIndexForEntryReturnType(const DataT &)>
+          getIndexForEntry);
 
   void emit();
 };
@@ -388,31 +387,39 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
   Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize});
 }
 
+static uint32_t constexpr LowerBitSize = dwarf::DW_IDX_type_hash;
+static uint32_t getTagFromAbbreviationTag(const uint32_t AbbrvTag) {
+  return AbbrvTag >> LowerBitSize;
+}
+static uint32_t
+constructAbbreviationTag(const unsigned Tag,
+                         const GetIndexForEntryReturnType &EntryRet) {
+  uint32_t AbbrvTag = 0;
+  if (EntryRet)
+    AbbrvTag |= 1 << EntryRet->second.Index;
+  AbbrvTag |= 1 << dwarf::DW_IDX_die_offset;
+  AbbrvTag |= Tag << LowerBitSize;
+  return AbbrvTag;
+}
 template <typename DataT>
-DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const {
-  DenseSet<uint32_t> UniqueTags;
+void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() {
   for (auto &Bucket : Contents.getBuckets()) {
     for (auto *Hash : Bucket) {
       for (auto *Value : Hash->Values) {
+        GetIndexForEntryReturnType EntryRet =
+            getIndexForEntry(*static_cast<const DataT *>(Value));
         unsigned Tag = static_cast<const DataT *>(Value)->getDieTag();
-        UniqueTags.insert(Tag);
+        uint32_t AbbrvTag = constructAbbreviationTag(Tag, EntryRet);
+        if (Abbreviations.count(AbbrvTag) == 0) {
+          SmallVector<DWARF5AccelTableData::AttributeEncoding, 2> UA;
+          if (EntryRet)
+            UA.push_back(EntryRet->second);
+          UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
+          Abbreviations.try_emplace(AbbrvTag, UA);
+        }
       }
     }
   }
-  return UniqueTags;
-}
-
-template <typename DataT>
-SmallVector<typename Dwarf5AccelTableWriter<DataT>::AttributeEncoding, 2>
-Dwarf5AccelTableWriter<DataT>::getUniformAttributes() const {
-  SmallVector<AttributeEncoding, 2> UA;
-  if (CompUnits.size() > 1) {
-    size_t LargestCUIndex = CompUnits.size() - 1;
-    dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false, LargestCUIndex);
-    UA.push_back({dwarf::DW_IDX_compile_unit, Form});
-  }
-  UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
-  return UA;
 }
 
 template <typename DataT>
@@ -426,6 +433,17 @@ void Dwarf5AccelTableWriter<DataT>::emitCUList() const {
   }
 }
 
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::emitTUList() const {
+  for (const auto &TU : enumerate(TypeUnits)) {
+    Asm->OutStreamer->AddComment("Type unit " + Twine(TU.index()));
+    if (std::holds_alternative<MCSymbol *>(TU.value()))
+      Asm->emitDwarfSymbolReference(std::get<MCSymbol *>(TU.value()));
+    else
+      Asm->emitDwarfLengthOrOffset(std::get<uint64_t>(TU.value()));
+  }
+}
+
 template <typename DataT>
 void Dwarf5AccelTableWriter<DataT>::emitBuckets() const {
   uint32_t Index = 1;
@@ -453,10 +471,11 @@ void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
   Asm->OutStreamer->emitLabel(AbbrevStart);
   for (const auto &Abbrev : Abbreviations) {
     Asm->OutStreamer->AddComment("Abbrev code");
-    assert(Abbrev.first != 0);
-    Asm->emitULEB128(Abbrev.first);
-    Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first));
+    uint32_t Tag = getTagFromAbbreviationTag(Abbrev.first);
+    assert(Tag != 0);
     Asm->emitULEB128(Abbrev.first);
+    Asm->OutStreamer->AddComment(dwarf::TagString(Tag));
+    Asm->emitULEB128(Tag);
     for (const auto &AttrEnc : Abbrev.second) {
       Asm->emitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data());
       Asm->emitULEB128(AttrEnc.Form,
@@ -471,16 +490,21 @@ void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
 
 template <typename DataT>
 void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const {
-  auto AbbrevIt = Abbreviations.find(Entry.getDieTag());
+  GetIndexForEntryReturnType EntryRet = getIndexForEntry(Entry);
+  uint32_t AbbrvTag = constructAbbreviationTag(Entry.getDieTag(), EntryRet);
+  auto AbbrevIt = Abbreviations.find(AbbrvTag);
   assert(AbbrevIt != Abbreviations.end() &&
          "Why wasn't this abbrev generated?");
-
+  assert(getTagFromAbbreviationTag(AbbrevIt->first) == Entry.getDieTag() &&
+         "Invalid Tag");
   Asm->emitULEB128(AbbrevIt->first, "Abbreviation code");
+
   for (const auto &AttrEnc : AbbrevIt->second) {
     Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index));
     switch (AttrEnc.Index) {
-    case dwarf::DW_IDX_compile_unit: {
-      DIEInteger ID(getCUIndexForEntry(Entry));
+    case dwarf::DW_IDX_compile_unit:
+    case dwarf::DW_IDX_type_unit: {
+      DIEInteger ID(EntryRet->first);
       ID.emitValue(Asm, AttrEnc.Form);
       break;
     }
@@ -512,22 +536,21 @@ template <typename DataT>
 Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
     AsmPrinter *Asm, const AccelTableBase &Contents,
     ArrayRef<std::variant<MCSymbol *, uint64_t>> CompUnits,
-    llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry)
+    ArrayRef<std::variant<MCSymbol *, uint64_t>> TypeUnits,
+    llvm::function_ref<GetIndexForEntryReturnType(const DataT &)>
+        getIndexForEntry)
     : AccelTableWriter(Asm, Contents, false),
-      Header(CompUnits.size(), Contents.getBucketCount(),
+      Header(CompUnits.size(), TypeUnits.size(), Contents.getBucketCount(),
              Contents.getUniqueNameCount()),
-      CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) {
-  DenseSet<uint32_t> UniqueTags = getUniqueTags();
-  SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes();
-
-  Abbreviations.reserve(UniqueTags.size());
-  for (uint32_t Tag : UniqueTags)
-    Abbreviations.try_emplace(Tag, UniformAttributes);
+      CompUnits(CompUnits), TypeUnits(TypeUnits),
+      getIndexForEntry(std::move(getIndexForEntry)) {
+  populateAbbrevsMap();
 }
 
 template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() {
   Header.emit(*this);
   emitCUList();
+  emitTUList();
   emitBuckets();
   emitHashes();
   emitStringOffsets();
@@ -545,12 +568,17 @@ void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
   AppleAccelTableWriter(Asm, Contents, Atoms, SecBegin).emit();
 }
 
-void llvm::emitDWARF5AccelTable(
-    AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents,
-    const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) {
+void llvm::emitDWARF5AccelTable(AsmPrinter *Asm,
+                                AccelTable<DWARF5AccelTableData> &Contents,
+                                const DwarfDebug &DD,
+                                ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs,
+                                ArrayRef<std::unique_ptr<DwarfTypeUnit>> TUs) {
   std::vector<std::variant<MCSymbol *, uint64_t>> CompUnits;
+  std::vector<std::variant<MCSymbol *, uint64_t>> TypeUnits;
   SmallVector<unsigned, 1> CUIndex(CUs.size());
-  int Count = 0;
+  DenseMap<const DIE *, unsigned> TUIndex(TUs.size());
+  int CUCount = 0;
+  int TUCount = 0;
   for (const auto &CU : enumerate(CUs)) {
     switch (CU.value()->getCUNode()->getNameTableKind()) {
     case DICompileUnit::DebugNameTableKind::Default:
@@ -559,13 +587,25 @@ void llvm::emitDWARF5AccelTable(
     default:
       continue;
     }
-    CUIndex[CU.index()] = Count++;
+    CUIndex[CU.index()] = CUCount++;
     assert(CU.index() == CU.value()->getUniqueID());
     const DwarfCompileUnit *MainCU =
         DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get();
     CompUnits.push_back(MainCU->getLabelBegin());
   }
 
+  for (const auto &TU : enumerate(TUs)) {
+    switch (TU.value()->getCUNode()->getNameTableKind()) {
+    case DICompileUnit::DebugNameTableKind::Default:
+      break;
+    default:
+      continue;
+    }
+    TUIndex[&TU.value()->getUnitDie()] = TUCount++;
+    const DwarfTypeUnit *MainTU = TU.value().get();
+    TypeUnits.push_back(MainTU->getLabelBegin());
+  }
+
   if (CompUnits.empty())
     return;
 
@@ -573,11 +613,21 @@ void llvm::emitDWARF5AccelTable(
       Asm->getObjFileLowering().getDwarfDebugNamesSection());
 
   Contents.finalize(Asm, "names");
+  dwarf::Form CUIndexForm =
+      DIEInteger::BestForm(/*IsSigned*/ false, CompUnits.size() - 1);
+  dwarf::Form TUIndexForm =
+      DIEInteger::BestForm(/*IsSigned*/ false, TypeUnits.size() - 1);
   Dwarf5AccelTableWriter<DWARF5AccelTableData>(
-      Asm, Contents, CompUnits,
-      [&](const DWARF5AccelTableData &Entry) {
+      Asm, Contents, CompUnits, TypeUnits,
+      [&](const DWARF5AccelTableData &Entry) -> GetIndexForEntryReturnType {
         const DIE *CUDie = Entry.getDie().getUnitDie();
-        return CUIndex[DD.lookupCU(CUDie)->getUniqueID()];
+        GetIndexForEntryReturnType Index = std::nullopt;
+        if (CUDie->getTag() == dwarf::DW_TAG_type_unit)
+          Index = {TUIndex[CUDie], {dwarf::DW_IDX_type_unit, TUIndexForm}};
+        else if (CUIndex.size() > 1)
+          Index = {CUIndex[DD.lookupCU(CUDie)->getUniqueID()],
+                   {dwarf::DW_IDX_compile_unit, CUIndexForm}};
+        return Index;
       })
       .emit();
 }
@@ -585,11 +635,13 @@ void llvm::emitDWARF5AccelTable(
 void llvm::emitDWARF5AccelTable(
     AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents,
     ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
-    llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)>
-        getCUIndexForEntry) {
+    llvm::function_ref<
+        GetIndexForEntryReturnType(const DWARF5AccelTableStaticData &)>
+        getIndexForEntry) {
+  std::vector<std::variant<MCSymbol *, uint64_t>> TypeUnits;
   Contents.finalize(Asm, "names");
-  Dwarf5AccelTableWriter<DWARF5AccelTableStaticData>(Asm, Contents, CUs,
-                                                     getCUIndexForEntry)
+  Dwarf5AccelTableWriter<DWARF5AccelTableStaticData>(
+      Asm, Contents, CUs, TypeUnits, getIndexForEntry)
       .emit();
 }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index ee2ab71ad28e47f..8a680dd9c6976e0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -305,6 +305,7 @@ void Loc::MMI::addFrameIndexExpr(const DIExpression *Expr, int FI) {
 
 static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
                                             bool GenerateTypeUnits,
+                                            bool HasSplitDwarf,
                                             DebuggerKind Tuning,
                                             const Triple &TT) {
   // Honor an explicit request.
@@ -312,7 +313,8 @@ static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
     return AccelTables;
 
   // Accelerator tables with type units are currently not supported.
-  if (GenerateTypeUnits)
+  if (GenerateTypeUnits &&
+      (DwarfVersion < 5 || HasSplitDwarf || !TT.isOSBinFormatELF()))
     return AccelTableKind::None;
 
   // Accelerator tables get emitted if targetting DWARF v5 or LLDB.  DWARF v5
@@ -325,6 +327,9 @@ static AccelTableKind computeAccelTableKind(unsigned DwarfVersion,
                                    : AccelTableKind::Dwarf;
   return AccelTableKind::None;
 }
+void DwarfDebug::addTypeUnit(std::unique_ptr<DwarfTypeUnit> U) {
+  InfoHolder.addTypeUnit(std::move(U));
+}
 
 DwarfDebug::DwarfDebug(AsmPrinter *A)
     : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
@@ -400,8 +405,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
                        A->TM.getTargetTriple().isOSBinFormatWasm()) &&
                       GenerateDwarfTypeUnits;
 
-  TheAccelTableKind = computeAccelTableKind(
-      DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple());
+  TheAccelTableKind =
+      computeAccelTableKind(DwarfVersion, GenerateTypeUnits, HasSplitDwarf,
+                            DebuggerTuning, A->TM.getTargetTriple());
 
   // Work around a GDB bug. GDB doesn't support the standard opcode;
   // SCE d...
[truncated]

dwblaikie

Given the concerns around memory usage of keeping the TUs alive - maybe we should
discuss that/consider those issues before getting into the weeds of the mechanics of this review (I left a few more comments before I realized maybe they won't make sense anyway, depending on the direction)?

dwblaikie · 2023-10-04T20:38:32Z

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

    for (auto &TU : TypeUnitsToAdd) {
      InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get());
      InfoHolder.emitUnit(TU.first.get(), useSplitDwarf());
+      if (getDwarfVersion() >= 5 &&
+          getAccelTableKind() == AccelTableKind::Dwarf) {
+        addTypeUnit(std::move(TU.first));


If I recall correctly... it's important that type units get discarded once emitted, for compiler memory use reasons. So keeping them around until we emit the name table might not be feasible.

Have you/can you measure the memory usage increase this change causes for some practical examples?

If I recall correctly... it's important that type units get discarded once emitted, for compiler memory use reasons. So keeping them around until we emit the name table might not be feasible.

Have you/can you measure the memory usage increase this change causes for some practical examples?

I have not. Were you thinking for one invocation of clang, or to build whole project?
What do you think a reasonable number of type units pre compilation modules are?

Won't memory foot print still be less then when we do monolithic dwarf?

It wouldn't be, no - type units have overhead necessary to make them standalone units, so the overhead is higher (& in any case, even if it isn't higher - it's still a lost opportunity/regression to substantially increase the memory overhead when using type units)

I have not. Were you thinking for one invocation of clang, or to build whole project?
What do you think a reasonable number of type units pre compilation modules are?

Given I expect this to be a fairly clear non-starter, I'd probably test something pretty simple like, say, Clang's Sema.cpp - it's a bit of a big/rough thing to compile, probably has enough types to show up a significant regression I'd expect would be caused by keeping all the TUs alive to the end of the compilation.

Yeah, that seems significant enough that it'd be worth preserving the behavior if reasonably possible.

So this work might end up looking more like a patch to change the way the current index works - to not depend on DIEs having a long lifetime (replacing DIE pointers with offsets - I guess maybe a union of the two, and we go through some lowering phase where the DIE pointer is read, the offset is retrieved (once known) and then the DIE pointer is replaced with the offset value) - just a pure refactor, no functional change. Then this patch could benefit from that restructure & avoid the memory regression/wouldn't need to extend the lifetime of type units.

Sound good?

Yeah something like that. As we briefly discussed during llvm dev meeting just carry relative offset and tag info to accelerator table.
I will start looking into it on Monday, and once that is done I'll post a new PR. This time I know what button not-to push to merge it by mistake. lol

For LLD patch talking to @MaskRay seems like I was on a right path trying to add more logic to InputSection::relocateNonAlloc. Although I am still not clear how to figure out if TU was comdat from a Symbol.

Hmm - I still wouldn't've thought you'd need to check if the TU is a comdat. That there'd already be existing code that's resolving/tombstoning - and it's just a question of picking a different tombstone if the relocation happens to be in the .debug_info section. Nothing about checking if it's comdat, or other details.

Maybe I am not looking in the right place. For relocations in .debug_names into .debug_info it doesn't hit any of the tombstone code in that function. I'll circle back to it, after llvm part.

Ah, perhaps there's a different codepath since the debug info section isn't SHF_ALLOC. Sorry, might be more involved.

dwblaikie · 2023-10-04T20:41:55Z

llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp

    assert(CU.index() == CU.value()->getUniqueID());
    const DwarfCompileUnit *MainCU =
        DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get();
    CompUnits.push_back(MainCU->getLabelBegin());
  }

+  for (const auto &TU : enumerate(TUs)) {


Why is this using enumerate if it's only using the values anyway? Perhaps this could iterate over TUs directly instead?

I was following the example of CUs iteration to try to keep coding style consistent.

This isn't a stylistic thing in this case - the CU iteration needed indexes, so it used enumerate, whereas the TU iteration doesn't need them, so it shouldn't use enumerate, I think.

(but, again, happy to wait on resolving a bunch of this stuff until we better understand the general approach/memory concerns, etc)

dwblaikie · 2023-10-04T20:42:47Z

llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp

+    switch (TU.value()->getCUNode()->getNameTableKind()) {
+    case DICompileUnit::DebugNameTableKind::Default:
+      break;
+    default:
+      continue;
+    }


Seems a bit convoluted (unless there's an expectation to add more here) compared to:

if (TU.value()->getCUNode()->getNameTableKind() != DICompileUnit::DebugNameTableKind::Default) continue;

dwblaikie · 2023-10-04T20:44:31Z

llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp

-      Asm, Contents, CompUnits,
-      [&](const DWARF5AccelTableData &Entry) {
+      Asm, Contents, CompUnits, TypeUnits,
+      [&](const DWARF5AccelTableData &Entry) -> GetIndexForEntryReturnType {
        const DIE *CUDie = Entry.getDie().getUnitDie();


CUDie might be a misleading name now that this might be a TUDie instead - UnitDie?

Summary: Adding support for Type Units in monolithic DWARF in .debug_names.

github-actions · 2023-10-05T18:08:00Z

✅ With the latest revision this PR passed the C/C++ code formatter.

ayermolo · 2023-10-05T18:25:06Z

ugh, committed by mistake, and revert button doesn't work. :/ Brings to github error page.

and getting access denied error when trying to push revert.

git push https://github.com/llvm/llvm-project.git HEAD:main
remote: Permission to llvm/llvm-project.git denied to ayermolo.
fatal: unable to access 'https://github.com/llvm/llvm-project.git/': The requested URL returned error: 403

dwblaikie · 2023-10-05T18:45:03Z

you can do the revert directly/yourself? (git revert blah git push - same way we would've done before we were using PRs?)?

ayermolo · 2023-10-05T18:46:39Z

you can do the revert directly/yourself? (git revert blah git push - same way we would've done before we were using PRs?)?

I tried that. I get permission error:
git revert 9bbd2bf
git push https://github.com/llvm/llvm-project.git HEAD:main
remote: Permission to llvm/llvm-project.git denied to ayermolo.
fatal: unable to access 'https://github.com/llvm/llvm-project.git/': The requested URL returned error: 403

Seems like my ssh key no longer works for some reason (it keeps asking for username/password), and with token I get the above.

#68131)" This reverts commit 9bbd2bf. Accidental commit: #68131 (comment)

nico · 2023-10-05T18:47:58Z

I reverted it for you in f320065

(The commit broke tests, but reading comments above that's probably expected: http://45.33.8.238/linux/120062/step_12.txt)

ayermolo · 2023-10-05T18:48:23Z

I reverted it for you in f320065

(The commit broke tests, but reading comments above that's probably expected: http://45.33.8.238/linux/120062/step_12.txt)

Thanks

ayermolo · 2023-10-05T21:07:04Z

llvm-dwarfdump part:
#68353

dwblaikie · 2023-10-11T22:27:28Z

http://reviews.llvm.org/D17118 - was the change with documented benefits that I hope we don't regress with this index change

dwblaikie · 2023-10-11T22:30:59Z

@pcc we're a bit concerned this patch might regress the improvement you made in D17118 - is there any chance you could run this against a similar workload? @ayermolo's numbers don't seem to indicate any real change & so maybe we regressed the D17118 improvement already previously?

dwblaikie · 2023-10-16T18:04:41Z

(not sure where it was recorded - maybe just in-person at LLVM dev)

@ayermolo did manage to find some data showing maybe a 10% memory regression for an IR->object compilation (where the backend memory usage isn't overshadowed by the frontend's peak) & I think is looking into it.

ayermolo · 2023-10-16T18:09:49Z

(not sure where it was recorded - maybe just in-person at LLVM dev)

@ayermolo did manage to find some data showing maybe a 10% memory regression for an IR->object compilation (where the backend memory usage isn't overshadowed by the frontend's peak) & I think is looking into it.

Yep. It's in the one of the threads in this PR. github is just hiding it under "load more".
Plan is to re-factor not to rely on DIE post PR, pot that, and then update this code.

dwblaikie · 2023-10-16T18:23:06Z

(not sure where it was recorded - maybe just in-person at LLVM dev)
@ayermolo did manage to find some data showing maybe a 10% memory regression for an IR->object compilation (where the backend memory usage isn't overshadowed by the frontend's peak) & I think is looking into it.

Yep. It's in the one of the threads in this PR. github is just hiding it under "load more". Plan is to re-factor not to rely on DIE post PR, pot that, and then update this code.

Not to duplicate conversations all over the place, but another thought I've had in other threads:

What if .debug_names entries referencing type units didn't have a DW_IDX_die_offset at all & relied on the type unit header to reference the type in the type unit? (is there anything else in a type unit we could be referencing? I don't think so... )

dwblaikie · 2023-10-16T18:23:41Z

(this would simplify the implementation/might be relevant to your work now and would mean no need to fix/change the indexes in the DWP)

ayermolo · 2023-10-16T18:27:24Z

Not sure, how tools would find the correct die. I would think there is some dupilcation?
I asked our internal LLDB experts to weight in. :D *cough @clayborg

dwblaikie · 2023-10-16T18:38:28Z

Not sure, how tools would find the correct die.

If it's only the type DIE - the header for the type unit has the offset to the type DIE.

clayborg · 2023-10-16T23:35:46Z

(not sure where it was recorded - maybe just in-person at LLVM dev)
@ayermolo did manage to find some data showing maybe a 10% memory regression for an IR->object compilation (where the backend memory usage isn't overshadowed by the frontend's peak) & I think is looking into it.

Yep. It's in the one of the threads in this PR. github is just hiding it under "load more". Plan is to re-factor not to rely on DIE post PR, pot that, and then update this code.

Not to duplicate conversations all over the place, but another thought I've had in other threads:

What if .debug_names entries referencing type units didn't have a DW_IDX_die_offset at all & relied on the type unit header to reference the type in the type unit? (is there anything else in a type unit we could be referencing? I don't think so... )

LLDB really wants all types indexed in the type unit, not just the main type. STL classes have many internal typedef types contained in the type itself ("iterator", "const_iterator", "size_type", "pointer_type", etc) and LLDB would like to be able to use these types in expressions. If we went with the above solution where we just index the type unit type only, then we would only be able to use these contained types if code from a CU needed to reference any of these types and had a copy in the CU that was indexed.

So I would vote for a complete index of the TU, or don't emit anything at all for the TU and LLDB will manually index it.

LLDB currently tracks which CUs and TUs have a .debug_names entry. For any CUs or TUs that don't have index entries, then we will manually index only the CUs and TUs that are not in any .debug_names. We needed to do this because many times you have mixed DWARF4 and DWARF5 object files that get compiled into the final executable.

ayermolo · 2023-10-17T23:56:08Z

refactor: #69399

MaskRay · 2023-11-06T00:25:51Z

llvm/test/DebugInfo/X86/debug-names-types-split.ll

+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"frame-pointer", i32 2}
+!9 = !{!"clang version 18.0.0 (ssh://git.vip.facebook.com/data/gitrepos/osmeta/external/llvm-project 680deb27e25976d9b74ad7b48ec5cb96be0e44b6)"}


You may consider -DLLVM_APPEND_VC_REV=OFF in the future :)

Thanks! learned something new. Thought I removed those manually.

llvmbot added the debuginfo label Oct 3, 2023

ayermolo force-pushed the typesDebugNames branch from cae691e to fa8e5db Compare October 3, 2023 17:27

ayermolo requested review from dwblaikie and cmtice October 3, 2023 17:28

dwblaikie reviewed Oct 4, 2023

View reviewed changes

[LLVM][DWARF] Add support for monolithic types in .debug_names

ad45044

Summary: Adding support for Type Units in monolithic DWARF in .debug_names.

ayermolo force-pushed the typesDebugNames branch from fa8e5db to ad45044 Compare October 5, 2023 17:56

fixup! [LLVM][DWARF] Add support for monolithic types in .debug_names

55a04fb

ayermolo merged commit 9bbd2bf into llvm:main Oct 5, 2023
2 of 3 checks passed

nico added a commit that referenced this pull request Oct 5, 2023

Revert "[LLVM][DWARF] Add support for monolithic types in .debug_names (

f320065

#68131)" This reverts commit 9bbd2bf. Accidental commit: #68131 (comment)

stepthomas mentioned this pull request Oct 10, 2023

AMDGPU stepthomas atomic csub no rtn forms ver2 stepthomas/llvm-project#1

Closed

ayermolo deleted the typesDebugNames branch October 27, 2023 22:01

MaskRay reviewed Nov 6, 2023

View reviewed changes

MaskRay mentioned this pull request Nov 6, 2023

[LLD] Tombstone LocalTU entry in .debug_names #70701

Closed

[LLVM][DWARF] Add support for monolithic types in .debug_names #68131

[LLVM][DWARF] Add support for monolithic types in .debug_names #68131

Conversation

ayermolo commented Oct 3, 2023 • edited Loading

llvmbot commented Oct 3, 2023

dwblaikie left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

github-actions bot commented Oct 5, 2023 • edited Loading

ayermolo commented Oct 5, 2023 • edited Loading

dwblaikie commented Oct 5, 2023

ayermolo commented Oct 5, 2023 • edited Loading

nico commented Oct 5, 2023

ayermolo commented Oct 5, 2023

ayermolo commented Oct 5, 2023

dwblaikie commented Oct 11, 2023

dwblaikie commented Oct 11, 2023

dwblaikie commented Oct 16, 2023

ayermolo commented Oct 16, 2023

dwblaikie commented Oct 16, 2023

dwblaikie commented Oct 16, 2023

ayermolo commented Oct 16, 2023

dwblaikie commented Oct 16, 2023

clayborg commented Oct 16, 2023

ayermolo commented Oct 17, 2023

MaskRay Nov 6, 2023 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ayermolo commented Oct 3, 2023 •

edited

Loading

github-actions bot commented Oct 5, 2023 •

edited

Loading

ayermolo commented Oct 5, 2023 •

edited

Loading

ayermolo commented Oct 5, 2023 •

edited

Loading

MaskRay Nov 6, 2023 •

edited

Loading