From 8e637b2cfca14a0c2b57282708753b8ef1590303 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Mon, 3 Nov 2014 20:10:27 -0500 Subject: [PATCH 1/7] Output labels when disassembling code --- src/disasm.cpp | 225 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 173 insertions(+), 52 deletions(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index 604aef2ce7b9b..736e3bbdef75a 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -17,8 +17,16 @@ // //===----------------------------------------------------------------------===// -#include +#include #include +#include +#include +#include +#include + +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCSymbol.h" using namespace llvm; @@ -42,6 +50,70 @@ class FuncMCView : public MemoryObject { return 0; } }; + +// Look up a symbol, and return a const char* to its name when the +// address matches. We currently just use "L
" as name for the +// symbol. We could easily get more fancy, e.g. numbering symbols +// sequentially or encoding the line number, but that doesn't seem +// necessary. +class SymbolTable { + typedef std::map TableType; + TableType Table; + std::string TempName; + int Pass; +public: + void setPass(int Pass) { this->Pass = Pass; } + int getPass() const { return Pass; } + void insertAddress(uint64_t addr); + void createSymbols(MCContext &Ctx); + const char *lookupSymbol(uint64_t addr); +}; +// Insert an address +void SymbolTable::insertAddress(uint64_t addr) +{ + Table[addr] = NULL; +} +// Create symbols for all addresses +void SymbolTable::createSymbols(MCContext &Ctx) +{ + for (TableType::iterator isymb = Table.begin(), esymb = Table.end(); + isymb != esymb; ++isymb) { + uint64_t addr = isymb->first; + std::string name = std::string("L") + std::to_string(addr); + MCSymbol *symb = Ctx.GetOrCreateSymbol(StringRef(name)); + symb->setVariableValue(MCConstantExpr::Create(addr, Ctx)); + isymb->second = symb; + } +} +const char *SymbolTable::lookupSymbol(uint64_t addr) +{ + std::cout << "lookupSymbol(" << addr << ")\n"; + if (!Table.count(addr)) return NULL; + MCSymbol *symb = Table[addr]; + TempName = symb->getName().str(); + std::cout << " found " << TempName << "\n"; + return TempName.c_str(); +} +const char *SymbolLookup(void *DisInfo_, + uint64_t ReferenceValue, + uint64_t *ReferenceType, + uint64_t ReferencePC, + const char **ReferenceName) +{ + SymbolTable *DisInfo = (SymbolTable*)DisInfo_; + if (DisInfo->getPass() != 0) { + if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) { + uint64_t addr = ReferenceValue; + const char *symbolName = DisInfo->lookupSymbol(addr); + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + *ReferenceName = NULL; + return symbolName; + } + } + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + *ReferenceName = NULL; + return NULL; +} } #ifndef USE_MCJIT @@ -118,12 +190,13 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize, #else OwningPtr STI(TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); - OwningPtr DisAsm(TheTarget->createMCDisassembler(*STI)); + OwningPtr DisAsm(TheTarget->createMCDisassembler(*STI)); #endif if (!DisAsm) { JL_PRINTF(JL_STDERR, "error: no disassembler for target", TripleName.c_str(), "\n"); return; } + SymbolTable DisInfo; unsigned OutputAsmVariant = 1; bool ShowEncoding = false; @@ -133,6 +206,8 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize, std::unique_ptr MCII(TheTarget->createMCInstrInfo()); #else OwningPtr MCII(TheTarget->createMCInstrInfo()); + OwningPtr + MCIA(TheTarget->createMCInstrAnalysis(MCII.get())); #endif MCInstPrinter* IP = TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *MCII, *MRI, *STI); @@ -165,63 +240,109 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize, // Make the MemoryObject wrapper FuncMCView memoryObject(Fptr, Fsize); - uint64_t Size = 0; - uint64_t Index = 0; - uint64_t absAddr = 0; + // Take two passes: In the first pass we record all branch labels, + // in the second we actually perform the output + for (int pass = 0; pass < 2; ++ pass) { + + DisInfo.setPass(pass); + if (pass != 0) { + // Switch to symbolic disassembly. We cannot do this + // before the first pass, because this changes branch + // targets from immediate values (constants) to + // expressions, which are not handled correctly by + // MCIA->evaluateBranch. (It should be possible to rewrite + // this routine to handle this case correctly as well.) + // Could add OpInfoLookup here + DisAsm->setupForSymbolicDisassembly + (NULL, SymbolLookup, &DisInfo, &Ctx); + } - // Set up the line info - typedef std::vector LInfoVec; - LInfoVec::iterator lineIter = lineinfo.begin(); - LInfoVec::iterator lineEnd = lineinfo.end(); + uint64_t Size = 0; + uint64_t Index = 0; + uint64_t absAddr = 0; - uint64_t nextLineAddr = -1; - DISubprogram debugscope; + // Set up the line info + typedef std::vector + LInfoVec; + LInfoVec::iterator lineIter = lineinfo.begin(); + LInfoVec::iterator lineEnd = lineinfo.end(); - if (lineIter != lineEnd) { - nextLineAddr = (*lineIter).Address; - debugscope = DISubprogram((*lineIter).Loc.getScope(jl_LLVMContext)); + uint64_t nextLineAddr = -1; + DISubprogram debugscope; - stream << "Filename: " << debugscope.getFilename() << "\n"; - stream << "Source line: " << (*lineIter).Loc.getLine() << "\n"; - } + if (lineIter != lineEnd) { + nextLineAddr = (*lineIter).Address; + debugscope = DISubprogram((*lineIter).Loc.getScope(jl_LLVMContext)); + + if (pass != 0) { + stream << "Filename: " << debugscope.getFilename() << "\n"; + stream << "Source line: " << (*lineIter).Loc.getLine() << "\n"; + } + } - // Do the disassembly - for (Index = 0, absAddr = (uint64_t)Fptr; - Index < memoryObject.getExtent(); Index += Size, absAddr += Size) { - - if (nextLineAddr != (uint64_t)-1 && absAddr == nextLineAddr) { - stream << "Source line: " << (*lineIter).Loc.getLine() << "\n"; - nextLineAddr = (*++lineIter).Address; + // Do the disassembly + for (Index = 0, absAddr = (uint64_t)Fptr; + Index < memoryObject.getExtent(); Index += Size, absAddr += Size) { + + if (nextLineAddr != (uint64_t)-1 && absAddr == nextLineAddr) { + if (pass != 0) + stream << "Source line: " + << (*lineIter).Loc.getLine() << "\n"; + nextLineAddr = (*++lineIter).Address; + } + if (pass != 0) { + // Uncomment this to output addresses for all instructions + // stream << Index << ": "; + const char *symbolName = DisInfo.lookupSymbol(Index); + if (symbolName) + stream << symbolName << ":"; + } + + MCInst Inst; + + MCDisassembler::DecodeStatus S; + S = DisAsm->getInstruction(Inst, Size, memoryObject, Index, + /*REMOVE*/ nulls(), nulls()); + switch (S) { + case MCDisassembler::Fail: + if (pass != 0) + SrcMgr.PrintMessage(SMLoc::getFromPointer(memoryObject[Index]), + SourceMgr::DK_Warning, + "invalid instruction encoding"); + if (Size == 0) + Size = 1; // skip illegible bytes + break; + + case MCDisassembler::SoftFail: + if (pass != 0) + SrcMgr.PrintMessage(SMLoc::getFromPointer(memoryObject[Index]), + SourceMgr::DK_Warning, + "potentially undefined instruction encoding"); + // Fall through + + case MCDisassembler::Success: + #ifdef LLVM35 + if (pass != 0) + Streamer->EmitInstruction(Inst, *STI); + #else + if (pass == 0) { + // Pass 0: Record all branch targets + if (MCIA->isBranch(Inst)) { + uint64_t addr = MCIA->evaluateBranch(Inst, Index, Size); + if (addr != uint64_t(-1)) + DisInfo.insertAddress(addr); + } + } else { + // Pass 1: Output instruction + Streamer->EmitInstruction(Inst); + } + #endif + break; + } } - MCInst Inst; - - MCDisassembler::DecodeStatus S; - S = DisAsm->getInstruction(Inst, Size, memoryObject, Index, - /*REMOVE*/ nulls(), nulls()); - switch (S) { - case MCDisassembler::Fail: - SrcMgr.PrintMessage(SMLoc::getFromPointer(memoryObject[Index]), - SourceMgr::DK_Warning, - "invalid instruction encoding"); - if (Size == 0) - Size = 1; // skip illegible bytes - break; - - case MCDisassembler::SoftFail: - SrcMgr.PrintMessage(SMLoc::getFromPointer(memoryObject[Index]), - SourceMgr::DK_Warning, - "potentially undefined instruction encoding"); - // Fall through - - case MCDisassembler::Success: - #ifdef LLVM35 - Streamer->EmitInstruction(Inst, *STI); - #else - Streamer->EmitInstruction(Inst); - #endif - break; - } + if (pass == 0) + DisInfo.createSymbols(Ctx); } #else // MCJIT version FuncMCView memoryObject(Fptr, Fsize); // MemoryObject wrapper From 335b9c895eb3ffe4f34139ed7797297d5e1434f9 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Mon, 3 Nov 2014 21:27:52 -0500 Subject: [PATCH 2/7] Avoid C++11 features --- src/disasm.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index 736e3bbdef75a..4f58bad8ee931 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -21,8 +21,8 @@ #include #include #include +#include #include -#include #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrAnalysis.h" @@ -79,8 +79,9 @@ void SymbolTable::createSymbols(MCContext &Ctx) for (TableType::iterator isymb = Table.begin(), esymb = Table.end(); isymb != esymb; ++isymb) { uint64_t addr = isymb->first; - std::string name = std::string("L") + std::to_string(addr); - MCSymbol *symb = Ctx.GetOrCreateSymbol(StringRef(name)); + std::ostringstream name; + name << "L" << addr; + MCSymbol *symb = Ctx.GetOrCreateSymbol(StringRef(name.str())); symb->setVariableValue(MCConstantExpr::Create(addr, Ctx)); isymb->second = symb; } From a44bf7b2f5961667f9cc8b6a9df5b56d685802fc Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 4 Nov 2014 19:14:54 -0500 Subject: [PATCH 3/7] Remove leftover debug output --- src/disasm.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index 4f58bad8ee931..172572e553fd1 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -88,11 +88,9 @@ void SymbolTable::createSymbols(MCContext &Ctx) } const char *SymbolTable::lookupSymbol(uint64_t addr) { - std::cout << "lookupSymbol(" << addr << ")\n"; if (!Table.count(addr)) return NULL; MCSymbol *symb = Table[addr]; TempName = symb->getName().str(); - std::cout << " found " << TempName << "\n"; return TempName.c_str(); } const char *SymbolLookup(void *DisInfo_, From b5f8d379ae084cce4ab8091b3d3088bb17183a2b Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 5 Nov 2014 19:21:54 -0500 Subject: [PATCH 4/7] Output labels for call statements as well The labels are still output with the wrong offset (as if they had the value 0), and debug info is enabled. --- src/disasm.cpp | 71 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index 172572e553fd1..439f63b14c92d 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #include +#include #include #include #include @@ -60,12 +61,18 @@ class SymbolTable { typedef std::map TableType; TableType Table; std::string TempName; + MCContext& Ctx; + const FuncMCView &MemObj; int Pass; public: + SymbolTable(MCContext &Ctx, const FuncMCView &MemObj): + Ctx(Ctx), MemObj(MemObj) {} + const FuncMCView &getMemoryObject() const { return MemObj; } void setPass(int Pass) { this->Pass = Pass; } int getPass() const { return Pass; } void insertAddress(uint64_t addr); - void createSymbols(MCContext &Ctx); + void createSymbol(const char *name, uint64_t addr); + void createSymbols(); const char *lookupSymbol(uint64_t addr); }; // Insert an address @@ -73,8 +80,14 @@ void SymbolTable::insertAddress(uint64_t addr) { Table[addr] = NULL; } +// Create a symbol +void SymbolTable::createSymbol(const char *name, uint64_t addr) +{ + MCSymbol *symb = Ctx.GetOrCreateSymbol(StringRef(name)); + // symb->setVariableValue(MCConstantExpr::Create(addr, Ctx)); +} // Create symbols for all addresses -void SymbolTable::createSymbols(MCContext &Ctx) +void SymbolTable::createSymbols() { for (TableType::iterator isymb = Table.begin(), esymb = Table.end(); isymb != esymb; ++isymb) { @@ -93,17 +106,18 @@ const char *SymbolTable::lookupSymbol(uint64_t addr) TempName = symb->getName().str(); return TempName.c_str(); } -const char *SymbolLookup(void *DisInfo_, + +const char *SymbolLookup(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName) { - SymbolTable *DisInfo = (SymbolTable*)DisInfo_; - if (DisInfo->getPass() != 0) { + SymbolTable *SymTab = (SymbolTable*)DisInfo; + if (SymTab->getPass() != 0) { if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) { uint64_t addr = ReferenceValue; - const char *symbolName = DisInfo->lookupSymbol(addr); + const char *symbolName = SymTab->lookupSymbol(addr); *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; *ReferenceName = NULL; return symbolName; @@ -113,6 +127,45 @@ const char *SymbolLookup(void *DisInfo_, *ReferenceName = NULL; return NULL; } + +extern "C" void jl_getFunctionInfo + (const char **name, size_t *line, const char **filename, uintptr_t pointer, + int *fromC, int skipC); +int OpInfoLookup(void *DisInfo, uint64_t PC, + uint64_t Offset, uint64_t Size, + int TagType, void *TagBuf) +{ + SymbolTable *SymTab = (SymbolTable*)DisInfo; + std::cout << "OpInfoLookup PC="< sizeof pointer) + return 0; // Input address size too large + for (int i=0; igetMemoryObject().readByte(PC+Offset+i, &byte); + std::memcpy((char*)&pointer+i, &byte, 1); + } + std::cout << " pointer=" << pointer << "\n"; + int skipC = 0; + const char *name; + size_t line; + const char *filename; + int fromC; + jl_getFunctionInfo(&name, &line, &filename, pointer, &fromC, skipC); + if (!name) + return 0; // Did not find symbolic information + std::cout << "getFunctionInfo name="<createSymbol(name, pointer); + // Describe the symbol + info->Present = 1; + info->Name = name; + info->Value = 0; // offset + return 1; // Success +} } #ifndef USE_MCJIT @@ -195,7 +248,6 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize, JL_PRINTF(JL_STDERR, "error: no disassembler for target", TripleName.c_str(), "\n"); return; } - SymbolTable DisInfo; unsigned OutputAsmVariant = 1; bool ShowEncoding = false; @@ -238,6 +290,7 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize, // Make the MemoryObject wrapper FuncMCView memoryObject(Fptr, Fsize); + SymbolTable DisInfo(Ctx, memoryObject); // Take two passes: In the first pass we record all branch labels, // in the second we actually perform the output @@ -253,7 +306,7 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize, // this routine to handle this case correctly as well.) // Could add OpInfoLookup here DisAsm->setupForSymbolicDisassembly - (NULL, SymbolLookup, &DisInfo, &Ctx); + (OpInfoLookup, SymbolLookup, &DisInfo, &Ctx); } uint64_t Size = 0; @@ -341,7 +394,7 @@ void jl_dump_function_asm(void *Fptr, size_t Fsize, } if (pass == 0) - DisInfo.createSymbols(Ctx); + DisInfo.createSymbols(); } #else // MCJIT version FuncMCView memoryObject(Fptr, Fsize); // MemoryObject wrapper From 29604d032384b9e318ffdbf6abd1a02e77bee2db Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 5 Nov 2014 19:44:15 -0500 Subject: [PATCH 5/7] Correct type error in symbol resolution --- src/disasm.cpp | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index 439f63b14c92d..fdb9909aaf818 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -71,7 +71,7 @@ class SymbolTable { void setPass(int Pass) { this->Pass = Pass; } int getPass() const { return Pass; } void insertAddress(uint64_t addr); - void createSymbol(const char *name, uint64_t addr); + // void createSymbol(const char *name, uint64_t addr); void createSymbols(); const char *lookupSymbol(uint64_t addr); }; @@ -81,11 +81,11 @@ void SymbolTable::insertAddress(uint64_t addr) Table[addr] = NULL; } // Create a symbol -void SymbolTable::createSymbol(const char *name, uint64_t addr) -{ - MCSymbol *symb = Ctx.GetOrCreateSymbol(StringRef(name)); - // symb->setVariableValue(MCConstantExpr::Create(addr, Ctx)); -} +// void SymbolTable::createSymbol(const char *name, uint64_t addr) +// { +// MCSymbol *symb = Ctx.GetOrCreateSymbol(StringRef(name)); +// symb->setVariableValue(MCConstantExpr::Create(addr, Ctx)); +// } // Create symbols for all addresses void SymbolTable::createSymbols() { @@ -136,10 +136,9 @@ int OpInfoLookup(void *DisInfo, uint64_t PC, int TagType, void *TagBuf) { SymbolTable *SymTab = (SymbolTable*)DisInfo; - std::cout << "OpInfoLookup PC="< sizeof pointer) return 0; // Input address size too large @@ -148,7 +147,6 @@ int OpInfoLookup(void *DisInfo, uint64_t PC, SymTab->getMemoryObject().readByte(PC+Offset+i, &byte); std::memcpy((char*)&pointer+i, &byte, 1); } - std::cout << " pointer=" << pointer << "\n"; int skipC = 0; const char *name; size_t line; @@ -157,14 +155,12 @@ int OpInfoLookup(void *DisInfo, uint64_t PC, jl_getFunctionInfo(&name, &line, &filename, pointer, &fromC, skipC); if (!name) return 0; // Did not find symbolic information - std::cout << "getFunctionInfo name="<createSymbol(name, pointer); // Describe the symbol - info->Present = 1; - info->Name = name; - info->Value = 0; // offset - return 1; // Success + info->AddSymbol.Present = 1; + info->AddSymbol.Name = name; + info->AddSymbol.Value = pointer; // unused by LLVM + info->Value = 0; // offset + return 1; // Success } } From f342f5e43980bf256f26242af0a04f71bb39f00a Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 5 Nov 2014 21:09:56 -0500 Subject: [PATCH 6/7] Make offset-reading code endian safe --- src/disasm.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index fdb9909aaf818..1e9789df6523f 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -139,13 +139,16 @@ int OpInfoLookup(void *DisInfo, uint64_t PC, if (TagType != 1) return 0; // Unknown data format LLVMOpInfo1 *info = (LLVMOpInfo1*)TagBuf; - size_t pointer = 0; - if (Size > sizeof pointer) - return 0; // Input address size too large + uint8_t bytes[Size]; for (int i=0; igetMemoryObject().readByte(PC+Offset+i, &byte); - std::memcpy((char*)&pointer+i, &byte, 1); + SymTab->getMemoryObject().readByte(PC+Offset+i, &bytes[i]); + size_t pointer; + switch (Size) { + case 1: { uint8_t val; std::memcpy(&val, bytes, 1); pointer = val; break; } + case 2: { uint16_t val; std::memcpy(&val, bytes, 2); pointer = val; break; } + case 4: { uint32_t val; std::memcpy(&val, bytes, 4); pointer = val; break; } + case 8: { uint64_t val; std::memcpy(&val, bytes, 8); pointer = val; break; } + default: return 0; // Cannot handle input address size } int skipC = 0; const char *name; From 86228e92d27e920696cf1cbf260048696ab3b1cd Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 5 Nov 2014 21:43:49 -0500 Subject: [PATCH 7/7] Correct syntax error --- src/disasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/disasm.cpp b/src/disasm.cpp index 1e9789df6523f..2723296f0f794 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -140,7 +140,7 @@ int OpInfoLookup(void *DisInfo, uint64_t PC, return 0; // Unknown data format LLVMOpInfo1 *info = (LLVMOpInfo1*)TagBuf; uint8_t bytes[Size]; - for (int i=0; igetMemoryObject().readByte(PC+Offset+i, &bytes[i]); size_t pointer; switch (Size) {