-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete #163654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f34ab2d
a1f32b5
0fd352d
c097d37
a19064d
e12e694
5eead6b
8f59bfa
a967994
0dc2c66
75dc996
5600e83
0df504e
644fce9
9188eff
b6ae0ba
b1cbdd0
8927a27
108bc08
6e3a5ac
3d8ad53
1344cdd
2978e21
eaab9df
c53e0ae
b707e80
57ab1cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| ; RUN: rm -rf %t | ||
| ; RUN: mkdir -p %t | ||
| ; RUN: cd %t | ||
|
|
||
| ; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof | ||
|
|
||
| ; Test --load-function-from-symbol=0 | ||
| ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB | ||
|
|
||
| ; CHECK-NO-LOAD-SYMTAB: warning: Loading of DWARF info completed, but no binary functions have been retrieved. | ||
|
|
||
| ; Test --load-function-from-symbol=1 | ||
| ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 | ||
| ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB | ||
|
|
||
| ; CHECK-LOAD-SYMTAB: main:2:1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 1: 1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 2: 1 foo:1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 281479271677951 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: foo:0:0 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 1: 0 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 4294967295 | ||
|
|
||
| ; Build instructions: | ||
| ; missing-dwarf.o: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g -o missing-dwarf.o -c | ||
| ; missing-dwarf.exe: clang -fdebug-compilation-dir=. missing-dwarf.o -o missing-dwarf.exe -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g | ||
|
|
||
| ; Source code: | ||
|
|
||
| int foo() { | ||
| return 1; | ||
| } | ||
|
|
||
| int main() { | ||
| foo(); | ||
| return 0; | ||
| } |
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -37,6 +37,14 @@ cl::opt<bool> ShowSourceLocations("show-source-locations", | |||
| cl::desc("Print source locations."), | ||||
| cl::cat(ProfGenCategory)); | ||||
|
|
||||
| cl::opt<bool> LoadFunctionFromSymbol( | ||||
| "load-function-from-symbol", cl::init(true), | ||||
| cl::desc( | ||||
| "Gather additional binary function info from symbols (e.g. .symtab) in " | ||||
| "case dwarf info is incomplete. Only support binaries in ELF format " | ||||
| "with pseudo probe, for other formats, this flag will be a no-op."), | ||||
| cl::cat(ProfGenCategory)); | ||||
|
|
||||
| static cl::opt<bool> | ||||
| ShowCanonicalFnName("show-canonical-fname", | ||||
| cl::desc("Print canonical function name."), | ||||
|
|
@@ -257,6 +265,9 @@ void ProfiledBinary::load() { | |||
| if (ShowDisassemblyOnly) | ||||
| decodePseudoProbe(Obj); | ||||
|
|
||||
| if (LoadFunctionFromSymbol && UsePseudoProbes) | ||||
| loadSymbolsFromSymtab(Obj); | ||||
|
|
||||
| // Disassemble the text sections. | ||||
| disassemble(Obj); | ||||
|
|
||||
|
|
@@ -461,6 +472,13 @@ void ProfiledBinary::decodePseudoProbe(const ObjectFile *Obj) { | |||
| } else { | ||||
| for (auto *F : ProfiledFunctions) { | ||||
| GuidFilter.insert(Function::getGUIDAssumingExternalLinkage(F->FuncName)); | ||||
| // DWARF name might be broken when a DWARF32 .debug_str.dwo section | ||||
| // execeeds 4GB. We expect symbol table to contain the correct function | ||||
| // names which matches the pseudo probe. Adding back all the GUIDs if | ||||
| // possible. | ||||
| auto AltGUIDs = AlternativeFunctionGUIDs.equal_range(F); | ||||
| for (const auto &[_, Func] : make_range(AltGUIDs)) | ||||
| GuidFilter.insert(Func); | ||||
| for (auto &Range : F->Ranges) { | ||||
| auto GUIDs = StartAddrToSymMap.equal_range(Range.first); | ||||
| for (const auto &[StartAddr, Func] : make_range(GUIDs)) | ||||
|
|
@@ -522,7 +540,9 @@ void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange, | |||
| // Set IsFuncEntry to ture if there is only one range in the function or the | ||||
| // RangeSymName from ELF is equal to its DWARF-based function name. | ||||
| if (FuncRange->Func->Ranges.size() == 1 || | ||||
| (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName)) | ||||
| (!FuncRange->IsFuncEntry && | ||||
| (FuncRange->getFuncName() == RangeSymName || | ||||
| FuncRange->Func->NameStatus != DwarfNameStatus::Matched))) | ||||
apolloww marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
| FuncRange->IsFuncEntry = true; | ||||
| } | ||||
|
|
||||
|
|
@@ -604,13 +624,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, | |||
| // Record potential call targets for tail frame inference later-on. | ||||
| if (InferMissingFrames && FRange) { | ||||
| uint64_t Target = 0; | ||||
| MIA->evaluateBranch(Inst, Address, Size, Target); | ||||
| bool Err = MIA->evaluateBranch(Inst, Address, Size, Target); | ||||
| if (MCDesc.isCall()) { | ||||
| // Indirect call targets are unknown at this point. Recording the | ||||
| // unknown target (zero) for further LBR-based refinement. | ||||
| MissingContextInferrer->CallEdges[Address].insert(Target); | ||||
| } else if (MCDesc.isUnconditionalBranch()) { | ||||
| assert(Target && | ||||
| assert(Err && | ||||
apolloww marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
| "target should be known for unconditional direct branch"); | ||||
| // Any inter-function unconditional jump is considered tail call at | ||||
| // this point. This is not 100% accurate and could further be | ||||
|
|
@@ -820,6 +840,100 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { | |||
| } | ||||
| } | ||||
|
|
||||
| void ProfiledBinary::loadSymbolsFromSymtab(const ObjectFile *Obj) { | ||||
| // Load binary functions from symbol table when Debug info is incomplete. | ||||
| // Strip the internal suffixes which are not reflected in the DWARF info. | ||||
| const SmallVector<StringRef, 10> Suffixes( | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we update the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the feedback! I was only worried if changing the |
||||
| {// Internal suffixes from CoroSplit pass | ||||
| ".cleanup", ".destroy", ".resume", | ||||
| // Internal suffixes from Bolt | ||||
| ".cold", ".warm", | ||||
| // Compiler/LTO internal | ||||
| ".llvm.", ".part.", ".isra.", ".constprop.", ".lto_priv."}); | ||||
| StringRef FileName = Obj->getFileName(); | ||||
| // Only apply this to ELF binary. e.g. COFF file format doesn't have `size` | ||||
| // field in the symbol table. | ||||
| bool IsELFObject = isa<ELFObjectFileBase>(Obj); | ||||
| if (!IsELFObject) | ||||
| return; | ||||
| for (const SymbolRef &Symbol : Obj->symbols()) { | ||||
| const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); | ||||
| const uint64_t StartAddr = unwrapOrError(Symbol.getAddress(), FileName); | ||||
| const StringRef Name = unwrapOrError(Symbol.getName(), FileName); | ||||
| uint64_t Size = 0; | ||||
| if (LLVM_LIKELY(IsELFObject)) { | ||||
| ELFSymbolRef ElfSymbol(Symbol); | ||||
| Size = ElfSymbol.getSize(); | ||||
| } | ||||
|
|
||||
| if (Size == 0 || Type != SymbolRef::ST_Function) | ||||
| continue; | ||||
|
|
||||
| const uint64_t EndAddr = StartAddr + Size; | ||||
| const StringRef SymName = | ||||
| FunctionSamples::getCanonicalFnName(Name, Suffixes); | ||||
| assert(StartAddr < EndAddr && StartAddr >= getPreferredBaseAddress() && | ||||
| "Function range is invalid."); | ||||
|
|
||||
| auto Range = findFuncRange(StartAddr); | ||||
| if (!Range) { | ||||
| assert(findFuncRange(EndAddr - 1) == nullptr && | ||||
| "Function range overlaps with existing functions."); | ||||
| // Function from symbol table not found previously in DWARF, store ranges. | ||||
| auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); | ||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If a range is found, we should expect function to be found. probably want to |
||||
| auto &Func = Ret.first->second; | ||||
| if (Ret.second) { | ||||
| Func.FuncName = Ret.first->first; | ||||
| HashBinaryFunctions[Function::getGUIDAssumingExternalLinkage(SymName)] = | ||||
| &Func; | ||||
| } | ||||
|
|
||||
| Func.NameStatus = DwarfNameStatus::Missing; | ||||
| Func.Ranges.emplace_back(StartAddr, EndAddr); | ||||
|
|
||||
| auto R = StartAddrToFuncRangeMap.emplace(StartAddr, FuncRange()); | ||||
| FuncRange &FRange = R.first->second; | ||||
|
|
||||
| FRange.Func = &Func; | ||||
| FRange.StartAddress = StartAddr; | ||||
| FRange.EndAddress = EndAddr; | ||||
|
|
||||
| } else if (SymName != Range->getFuncName()) { | ||||
| // Function range already found from DWARF, but the symbol name from | ||||
| // symbol table is inconsistent with debug info. Log this discrepancy and | ||||
| // the alternative function GUID. | ||||
| if (ShowDetailedWarning) | ||||
| WithColor::warning() | ||||
| << "Conflicting name for symbol " << Name << " with range (" | ||||
| << format("%8" PRIx64, StartAddr) << ", " | ||||
| << format("%8" PRIx64, EndAddr) << ")" | ||||
| << ", but the DWARF symbol " << Range->getFuncName() | ||||
| << " indicates an overlapping range (" | ||||
| << format("%8" PRIx64, Range->StartAddress) << ", " | ||||
| << format("%8" PRIx64, Range->EndAddress) << ")\n"; | ||||
|
|
||||
| assert(StartAddr == Range->StartAddress && EndAddr == Range->EndAddress && | ||||
| "Mismatched function range"); | ||||
|
|
||||
| Range->Func->NameStatus = DwarfNameStatus::Mismatch; | ||||
apolloww marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
| AlternativeFunctionGUIDs.emplace( | ||||
| Range->Func, Function::getGUIDAssumingExternalLinkage(SymName)); | ||||
|
|
||||
| } else if (StartAddr != Range->StartAddress && | ||||
apolloww marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
| EndAddr != Range->EndAddress) { | ||||
| // Function already found in DWARF, but the address range from symbol | ||||
| // table conflicts/overlaps with the debug info. | ||||
| WithColor::warning() << "Conflicting range for symbol " << Name | ||||
| << " with range (" << format("%8" PRIx64, StartAddr) | ||||
| << ", " << format("%8" PRIx64, EndAddr) << ")" | ||||
| << ", but the DWARF symbol " << Range->getFuncName() | ||||
| << " indicates another range (" | ||||
| << format("%8" PRIx64, Range->StartAddress) << ", " | ||||
| << format("%8" PRIx64, Range->EndAddress) << ")\n"; | ||||
| } | ||||
| } | ||||
| } | ||||
|
|
||||
| void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) { | ||||
| for (const auto &DieInfo : CompilationUnit.dies()) { | ||||
| llvm::DWARFDie Die(&CompilationUnit, &DieInfo); | ||||
|
|
@@ -1034,6 +1148,58 @@ void ProfiledBinary::computeInlinedContextSizeForFunc( | |||
| } | ||||
| } | ||||
|
|
||||
| void ProfiledBinary::loadSymbolsFromPseudoProbe() { | ||||
| if (!UsePseudoProbes) | ||||
| return; | ||||
|
|
||||
| const AddressProbesMap &Address2ProbesMap = getAddress2ProbesMap(); | ||||
| for (auto *Func : ProfiledFunctions) { | ||||
| if (Func->NameStatus != DwarfNameStatus::Mismatch) | ||||
| continue; | ||||
| for (auto &[StartAddr, EndAddr] : Func->Ranges) { | ||||
| auto Range = findFuncRangeForStartAddr(StartAddr); | ||||
| if (!Range->IsFuncEntry) | ||||
| continue; | ||||
| const auto &Probe = Address2ProbesMap.find(StartAddr, EndAddr); | ||||
| if (Probe.begin() != Probe.end()) { | ||||
| const MCDecodedPseudoProbeInlineTree *InlineTreeNode = | ||||
| Probe.begin()->get().getInlineTreeNode(); | ||||
| while (!InlineTreeNode->isTopLevelFunc()) | ||||
| InlineTreeNode = static_cast<MCDecodedPseudoProbeInlineTree *>( | ||||
| InlineTreeNode->Parent); | ||||
|
|
||||
| auto TopLevelProbes = InlineTreeNode->getProbes(); | ||||
| auto TopProbe = TopLevelProbes.begin(); | ||||
| assert(TopProbe != TopLevelProbes.end() && | ||||
| TopProbe->getAddress() >= StartAddr && | ||||
| TopProbe->getAddress() < EndAddr && | ||||
| "Top level pseudo probe does not match function range"); | ||||
|
|
||||
| const auto *ProbeDesc = getFuncDescForGUID(InlineTreeNode->Guid); | ||||
| auto Ret = PseudoProbeNames.emplace(Func, ProbeDesc->FuncName); | ||||
| if (!Ret.second && Ret.first->second != ProbeDesc->FuncName && | ||||
| ShowDetailedWarning) | ||||
| WithColor::warning() | ||||
| << "Mismatched pseudo probe names in function " << Func->FuncName | ||||
| << " at range: (" << format("%8" PRIx64, StartAddr) << ", " | ||||
| << format("%8" PRIx64, EndAddr) << "). " | ||||
| << "The previously found pseudo probe name is " | ||||
| << Ret.first->second << " but it conflicts with name " | ||||
| << ProbeDesc->FuncName | ||||
| << " This likely indicates a DWARF error that produces " | ||||
| "conflicting symbols at the same starting address.\n"; | ||||
| } | ||||
| } | ||||
| } | ||||
| } | ||||
|
|
||||
| StringRef ProfiledBinary::findPseudoProbeName(const BinaryFunction *Func) { | ||||
| auto ProbeName = PseudoProbeNames.find(Func); | ||||
| if (ProbeName == PseudoProbeNames.end()) | ||||
| return StringRef(); | ||||
| return ProbeName->second; | ||||
| } | ||||
|
Comment on lines
+1196
to
+1201
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if we really need an extra map. The symbol from pseudo probe should be the ground truth(otherwise it's not loaded by compiler), how about we just overwriting the function name field in
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmmm, the problem is
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, that makes sense, thanks for the clarification! |
||||
|
|
||||
| void ProfiledBinary::inferMissingFrames( | ||||
| const SmallVectorImpl<uint64_t> &Context, | ||||
| SmallVectorImpl<uint64_t> &NewContext) { | ||||
|
|
||||
Uh oh!
There was an error while loading. Please reload this page.