-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[llvm-profgen] Loading binary functions from .symtab when DWARF info is incomplete #163654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
f34ab2d
a1f32b5
0fd352d
c097d37
a19064d
e12e694
5eead6b
8f59bfa
a967994
0dc2c66
75dc996
5600e83
0df504e
644fce9
9188eff
b6ae0ba
b1cbdd0
8927a27
108bc08
6e3a5ac
3d8ad53
1344cdd
2978e21
eaab9df
c53e0ae
b707e80
57ab1cd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| ; RUN: rm -rf %t | ||
| ; RUN: mkdir -p %t | ||
| ; RUN: cd %t | ||
|
|
||
| ; RUN: echo -e "1\n401120-40113b:1\n1\n40112f->401110:1" > %t.prof | ||
|
|
||
| ; Test --load-function-from-symbol=0 | ||
| ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t1 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=0 2>&1 | FileCheck %s --check-prefix=CHECK-NO-LOAD-SYMTAB | ||
|
|
||
| ; CHECK-NO-LOAD-SYMTAB: warning: 100.00%(1/1) of function range samples do not belong to any function | ||
| ; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR source samples do not belong to any function | ||
| ; CHECK-NO-LOAD-SYMTAB-NEXT: warning: 100.00%(1/1) of LBR target samples do not belong to any function | ||
|
|
||
| ; Test --load-function-from-symbol=1 | ||
| ; RUN: llvm-profgen --format=text --unsymbolized-profile=%t.prof --binary=%S/Inputs/missing-dwarf.exe --output=%t2 --fill-zero-for-all-funcs --show-detailed-warning --use-offset=0 --load-function-from-symbol=1 | ||
| ; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-LOAD-SYMTAB | ||
|
|
||
| ; CHECK-LOAD-SYMTAB: main:2:1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 1: 1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 2: 1 foo:1 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 281479271677951 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: foo:0:0 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: 1: 0 | ||
| ; CHECK-LOAD-SYMTAB-NEXT: !CFGChecksum: 4294967295 | ||
|
|
||
| ; Build instructions: | ||
| ; missing-dwarf.o: clang -gsplit-dwarf=split -fdebug-compilation-dir=. test.c -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g -o missing-dwarf.o -c | ||
| ; missing-dwarf.exe: clang -fdebug-compilation-dir=. missing-dwarf.o -o missing-dwarf.exe -fdebug-info-for-profiling -fpseudo-probe-for-profiling -O0 -g | ||
|
|
||
| ; Source code: | ||
|
|
||
| int foo() { | ||
| return 1; | ||
| } | ||
|
|
||
| int main() { | ||
| foo(); | ||
| return 0; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -449,29 +449,62 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile( | |
| // Go through all the stacks, ranges and branches in sample counters, use | ||
| // the start of the range to look up the function it belongs and record the | ||
| // function. | ||
| uint64_t ErrStkAddr = 0, ErrFuncRange = 0, ErrSrc = 0, ErrTgt = 0; | ||
| uint64_t TotalStkAddr = 0, TotalFuncRange = 0, TotalSrc = 0, TotalTgt = 0; | ||
| for (const auto &CI : *SampleCounters) { | ||
| if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) { | ||
| for (auto StackAddr : CtxKey->Context) { | ||
| uint64_t Inc = Binary->addressIsCode(StackAddr) ? 1 : 0; | ||
| TotalStkAddr += Inc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(StackAddr)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrStkAddr += Inc; | ||
| } | ||
| } | ||
|
|
||
| for (auto Item : CI.second.RangeCounter) { | ||
| uint64_t StartAddress = Item.first.first; | ||
| uint64_t Inc = Binary->addressIsCode(StartAddress) ? Item.second : 0; | ||
| TotalFuncRange += Inc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(StartAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrFuncRange += Inc; | ||
| } | ||
|
|
||
| for (auto Item : CI.second.BranchCounter) { | ||
| uint64_t SourceAddress = Item.first.first; | ||
| uint64_t TargetAddress = Item.first.second; | ||
| uint64_t SrcInc = Binary->addressIsCode(SourceAddress) ? Item.second : 0; | ||
| uint64_t TgtInc = Binary->addressIsCode(TargetAddress) ? Item.second : 0; | ||
| TotalSrc += SrcInc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(SourceAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrSrc += SrcInc; | ||
| TotalTgt += TgtInc; | ||
| if (FuncRange *FRange = Binary->findFuncRange(TargetAddress)) | ||
| ProfiledFunctions.insert(FRange->Func); | ||
| else | ||
| ErrTgt += TgtInc; | ||
| } | ||
| } | ||
|
|
||
| if (ErrStkAddr) | ||
|
||
| emitWarningSummary( | ||
| ErrStkAddr, TotalStkAddr, | ||
| "of stack address samples do not belong to any function"); | ||
| if (ErrFuncRange) | ||
| emitWarningSummary( | ||
| ErrFuncRange, TotalFuncRange, | ||
| "of function range samples do not belong to any function"); | ||
|
||
| if (ErrSrc) | ||
| emitWarningSummary(ErrSrc, TotalSrc, | ||
| "of LBR source samples do not belong to any function"); | ||
| if (ErrTgt) | ||
| emitWarningSummary(ErrTgt, TotalTgt, | ||
| "of LBR target samples do not belong to any function"); | ||
|
||
| return true; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -65,6 +65,13 @@ static cl::list<std::string> DisassembleFunctions( | |||
| "names only. Only work with show-disassembly-only"), | ||||
| cl::cat(ProfGenCategory)); | ||||
|
|
||||
| static cl::opt<bool> | ||||
| LoadFunctionFromSymbol("load-function-from-symbol", cl::init(true), | ||||
|
||||
| cl::desc("Gather additional binary function info " | ||||
| "from symbols (e.g. .symtab) in case " | ||||
| "dwarf info is incomplete."), | ||||
| cl::cat(ProfGenCategory)); | ||||
|
|
||||
| static cl::opt<bool> | ||||
| KernelBinary("kernel", | ||||
| cl::desc("Generate the profile for Linux kernel binary."), | ||||
|
|
@@ -257,6 +264,9 @@ void ProfiledBinary::load() { | |||
| if (ShowDisassemblyOnly) | ||||
| decodePseudoProbe(Obj); | ||||
|
|
||||
| if (LoadFunctionFromSymbol && UsePseudoProbes) | ||||
apolloww marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
| populateSymbolsFromBinary(Obj); | ||||
|
|
||||
| // Disassemble the text sections. | ||||
| disassemble(Obj); | ||||
|
|
||||
|
|
@@ -604,13 +614,13 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes, | |||
| // Record potential call targets for tail frame inference later-on. | ||||
| if (InferMissingFrames && FRange) { | ||||
| uint64_t Target = 0; | ||||
| MIA->evaluateBranch(Inst, Address, Size, Target); | ||||
| bool Err = MIA->evaluateBranch(Inst, Address, Size, Target); | ||||
| if (MCDesc.isCall()) { | ||||
| // Indirect call targets are unknown at this point. Recording the | ||||
| // unknown target (zero) for further LBR-based refinement. | ||||
| MissingContextInferrer->CallEdges[Address].insert(Target); | ||||
| } else if (MCDesc.isUnconditionalBranch()) { | ||||
| assert(Target && | ||||
| assert(Err && | ||||
apolloww marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
| "target should be known for unconditional direct branch"); | ||||
| // Any inter-function unconditional jump is considered tail call at | ||||
| // this point. This is not 100% accurate and could further be | ||||
|
|
@@ -820,6 +830,54 @@ void ProfiledBinary::populateSymbolAddressList(const ObjectFile *Obj) { | |||
| } | ||||
| } | ||||
|
|
||||
| void ProfiledBinary::populateSymbolsFromBinary(const ObjectFile *Obj) { | ||||
|
||||
| // Load binary functions from symbol table when Debug info is incomplete | ||||
| const SmallVector<StringRef> Suffixes( | ||||
|
||||
| {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); | ||||
| StringRef FileName = Obj->getFileName(); | ||||
| for (const SymbolRef &Symbol : Obj->symbols()) { | ||||
| const SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName); | ||||
| const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); | ||||
|
||||
| const StringRef Name = unwrapOrError(Symbol.getName(), FileName); | ||||
| uint64_t Size = 0; | ||||
| if (isa<ELFObjectFileBase>(Symbol.getObject())) { | ||||
|
||||
| ELFSymbolRef ElfSymbol(Symbol); | ||||
| Size = ElfSymbol.getSize(); | ||||
| } | ||||
|
|
||||
| if (Size == 0 || Type != SymbolRef::ST_Function) | ||||
| continue; | ||||
|
|
||||
| const StringRef SymName = | ||||
| FunctionSamples::getCanonicalFnName(Name, Suffixes); | ||||
|
|
||||
| auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); | ||||
| auto &Func = Ret.first->second; | ||||
| if (Ret.second) { | ||||
| Func.FuncName = Ret.first->first; | ||||
| HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func; | ||||
| } | ||||
|
|
||||
| if (auto Range = findFuncRange(Addr)) { | ||||
| if (Ret.second && ShowDetailedWarning) | ||||
| WithColor::warning() | ||||
|
||||
| auto Ret = BinaryFunctions.emplace(SymName, BinaryFunction()); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Symbol table finding entry already existing in dwarf should not be a warning.
When that happens, we should check if they both point to same ranges, and only warn when
- symbol table and dwarf conflicts for the actual range.
- something is in symbol table, but not in dwarf (suggesting dwarf corrupted, e.g. due to relo overflow)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated this part and the warning shall now capture case 1. And I've merged (2) into the PerfScriptReader::warnInvalidRange :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. Can you test on a big binary to make sure this warning does not fire?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I actually didn't see this warning firing at all in my test, but it shall capture the naming conflicts between dwarf and symbol table, like... in case I missed to strip some internal suffixes...
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should only be storing additional range, not "updating" existing range, right?
Also suggest to structure the code like below for clarify:
auto &Func = Ret.first->second;
if (Ret.second) {
// Function from symbol table not found previously in DWARF, store ranges.
Func.FuncName = Ret.first->first;
Func.FromSymtab = true;
HashBinaryFunctions[MD5Hash(StringRef(SymName))] = &Func;
...
}
else {
// Function already found from DWARF, check consistency between symbol table and DWARF.
...
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, right.... "updating" is actually not a correct behavior. let me flag that in a warning too :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually... we probably still need to "update" the range.. because like, coroutine creates a few functions: "foo", "foo.destroy", "foo.resume", "foo.cleanup", and each of them has a different address range in the symbol table. Even Ret.second == false could still be some function added from the symbol table...
Maybe the StartAddr works better to check if a function is already found in DWARF or not? I'm changing the logic to something like this:
auto Range = findFuncRange(StartAddr);
if (!Range || Range->StartAddress != StartAddr) {
// Function from symbol table not found previously in DWARF, store ranges.
...
} else if (SymName != Range->getFuncName() && ShowDetailedWarning) {
// Function already found from DWARF, check consistency between symbol table and DWARF.
WithColor::warning()...
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit for readability:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also a question, do we expect StackAddrIsCode == false, but findFuncRange has a valid function range?