diff --git a/CMake/LLVM/LLVM.cmake b/CMake/LLVM/LLVM.cmake index 457fc33542d..1b0a5ce6c17 100644 --- a/CMake/LLVM/LLVM.cmake +++ b/CMake/LLVM/LLVM.cmake @@ -62,4 +62,16 @@ endif() # Add the external llvm project to the build. add_subdirectory(${propeller_llvm_src_dir}/llvm ${propeller_llvm_build_dir}/llvm EXCLUDE_FROM_ALL) -include_directories(${propeller_llvm_src_dir}/llvm/include ${propeller_llvm_build_dir}/llvm/include) +include_directories( + ${propeller_llvm_src_dir} + ${propeller_llvm_src_dir}/llvm/include + ${propeller_llvm_build_dir} + ${propeller_llvm_build_dir}/llvm/include +) +# Add generated target-specific library directories. +foreach (tgt ${LLVM_TARGETS_TO_BUILD}) + include_directories( + ${propeller_llvm_src_dir}/llvm/lib/Target/${tgt} + ${propeller_llvm_build_dir}/llvm/lib/Target/${tgt} + ) +endforeach() diff --git a/propeller/BUILD b/propeller/BUILD index f34c22cc8a7..4e1507bf382 100644 --- a/propeller/BUILD +++ b/propeller/BUILD @@ -649,6 +649,29 @@ cc_library( ], ) +cc_library( + name = "mini_disassembler", + srcs = ["mini_disassembler.cc"], + hdrs = ["mini_disassembler.h"], + deps = [ + "@abseil-cpp//absl/base:nullability", + "@abseil-cpp//absl/log", + "@abseil-cpp//absl/log:check", + "@abseil-cpp//absl/memory", + "@abseil-cpp//absl/status", + "@abseil-cpp//absl/status:statusor", + "@abseil-cpp//absl/strings", + "@abseil-cpp//absl/strings:str_format", + "@llvm-project//llvm:AllTargetsAsmParsers", # buildcleaner: keep + "@llvm-project//llvm:AllTargetsDisassemblers", # buildcleaner: keep + "@llvm-project//llvm:MC", + "@llvm-project//llvm:MCDisassembler", + "@llvm-project//llvm:Object", + "@llvm-project//llvm:Support", + "@llvm-project//llvm:TargetParser", + ], +) + ######################## # Tests & Test Utils # ######################## @@ -1125,3 +1148,21 @@ cc_test( "@com_google_googletest//:gtest_main", ], ) + +cc_test( + name = "mini_disassembler_test", + srcs = ["mini_disassembler_test.cc"], + data = [ + "//propeller/testdata:llvm_function_samples.binary", + ], + deps = [ + ":binary_content", + ":mini_disassembler", + ":status_testing_macros", + "@abseil-cpp//absl/status:status_matchers", + "@abseil-cpp//absl/strings", + "@com_google_googletest//:gtest_main", + "@llvm-project//llvm:MC", + "@llvm-project//llvm:X86UtilsAndDesc", + ], +) diff --git a/propeller/CMakeLists.txt b/propeller/CMakeLists.txt index a5e508791bc..2952c07b84b 100644 --- a/propeller/CMakeLists.txt +++ b/propeller/CMakeLists.txt @@ -42,6 +42,7 @@ add_library(propeller_lib OBJECT file_perf_data_provider.cc frequencies_branch_aggregator.cc lbr_branch_aggregator.cc + mini_disassembler.cc node_chain.cc node_chain_assembly.cc node_chain_builder.cc @@ -66,6 +67,11 @@ target_link_libraries(propeller_lib quipper_protos # keep-sorted end ) +foreach (tgt ${LLVM_TARGETS_TO_BUILD}) + foreach (tool AsmParser Desc Disassembler Info) + target_link_libraries(propeller_lib LLVM${tgt}${tool}) + endforeach() +endforeach() # Build all CXX test utilities into a unified library. add_library(propeller_test_lib OBJECT diff --git a/propeller/mini_disassembler.cc b/propeller/mini_disassembler.cc new file mode 100644 index 00000000000..30e56d3b01b --- /dev/null +++ b/propeller/mini_disassembler.cc @@ -0,0 +1,139 @@ +#include "propeller/mini_disassembler.h" + +#include +#include +#include + +#include "absl/base/nullability.h" +#include "absl/log/check.h" +#include "absl/log/log.h" +#include "absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Triple.h" + +namespace propeller { +absl::StatusOr>> +MiniDisassembler::Create(const llvm::object::ObjectFile *object_file) { + auto disassembler = + absl::WrapUnique(new MiniDisassembler(object_file)); + + std::string err; + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + + llvm::Triple triple; + triple.setArch( + llvm::Triple::ArchType(object_file->getArch())); + const llvm::Target *target = + llvm::TargetRegistry::lookupTarget(triple.normalize(), err); + if (target == nullptr) { + return absl::FailedPreconditionError(absl::StrFormat( + "no target for triple '%s': %s", triple.getArchName(), err)); + } + disassembler->mri_ = + absl::WrapUnique(target->createMCRegInfo(triple.getTriple())); + if (disassembler->mri_ == nullptr) { + return absl::FailedPreconditionError(absl::StrFormat( + "createMCRegInfo failed for triple '%s'", triple.getArchName())); + } + disassembler->asm_info_ = absl::WrapUnique(target->createMCAsmInfo( + *disassembler->mri_, triple.getTriple(), llvm::MCTargetOptions())); + if (disassembler->asm_info_ == nullptr) { + return absl::FailedPreconditionError(absl::StrFormat( + "createMCAsmInfo failed for triple '%s'", triple.getArchName())); + } + + disassembler->sti_ = absl::WrapUnique(target->createMCSubtargetInfo( + triple.getTriple(), /*CPU=*/"", /*Features=*/"")); + if (disassembler->sti_ == nullptr) { + return absl::FailedPreconditionError(absl::StrFormat( + "createMCSubtargetInfo failed for triple '%s'", triple.getArchName())); + } + + disassembler->mii_ = absl::WrapUnique(target->createMCInstrInfo()); + if (disassembler->mii_ == nullptr) { + return absl::FailedPreconditionError(absl::StrFormat( + "createMCInstrInfo failed for triple '%s'", triple.getArchName())); + } + + disassembler->mia_ = + absl::WrapUnique(target->createMCInstrAnalysis(disassembler->mii_.get())); + if (disassembler->mia_ == nullptr) { + return absl::FailedPreconditionError(absl::StrFormat( + "createMCInstrAnalysis failed for triple '%s'", triple.getArchName())); + } + + disassembler->ctx_ = std::make_unique( + triple, disassembler->asm_info_.get(), disassembler->mri_.get(), + disassembler->sti_.get()); + disassembler->disasm_ = absl::WrapUnique( + target->createMCDisassembler(*disassembler->sti_, *disassembler->ctx_)); + if (disassembler->disasm_ == nullptr) + return absl::FailedPreconditionError( + absl::StrFormat("createMCDisassembler failed")); + + return disassembler; +} + +absl::StatusOr MiniDisassembler::DisassembleOne( + uint64_t binary_address) { + for (const auto §ion : object_file_->sections()) { + if (!section.isText() || section.isVirtual()) { + continue; + } + if (binary_address < section.getAddress() || + binary_address >= section.getAddress() + section.getSize()) { + continue; + } + llvm::Expected content = section.getContents(); + if (!content) { + return absl::FailedPreconditionError("section has no content"); + } + llvm::ArrayRef content_bytes( + reinterpret_cast(content->data()), content->size()); + uint64_t section_offset = binary_address - section.getAddress(); + llvm::MCInst inst; + uint64_t size; + if (!disasm_->getInstruction(inst, size, + content_bytes.slice(section_offset), + binary_address, llvm::nulls())) { + return absl::FailedPreconditionError(absl::StrFormat( + "getInstruction failed at binary address 0x%lx", binary_address)); + } + return inst; + } + return absl::FailedPreconditionError(absl::StrFormat( + "no section containing address 0x%lx found", binary_address)); +} + +bool MiniDisassembler::MayAffectControlFlow(const llvm::MCInst &inst) { + return mii_->get(inst.getOpcode()).mayAffectControlFlow(inst, *mri_); +} + +llvm::StringRef MiniDisassembler::GetInstructionName( + const llvm::MCInst &inst) const { + return mii_->getName(inst.getOpcode()); +} + +absl::StatusOr MiniDisassembler::MayAffectControlFlow( + uint64_t binary_address) { + auto inst = DisassembleOne(binary_address); + if (!inst.ok()) return inst.status(); + return MayAffectControlFlow(inst.value()); +} +} // namespace propeller diff --git a/propeller/mini_disassembler.h b/propeller/mini_disassembler.h new file mode 100644 index 00000000000..c544dac6315 --- /dev/null +++ b/propeller/mini_disassembler.h @@ -0,0 +1,55 @@ +#ifndef PROPELLER_MINI_DISASSEMBLER_H_ +#define PROPELLER_MINI_DISASSEMBLER_H_ + +#include +#include + +#include "absl/base/nullability.h" +#include "absl/status/statusor.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/ObjectFile.h" + +namespace propeller { +class MiniDisassembler { + public: + // Creates a MiniDisassembler for `object_file`. Does not take ownership of + // `object_file`, which must point to a valid object that outlives the + // `MiniDisassembler`. + static absl::StatusOr>> + Create(const llvm::object::ObjectFile *object_file); + + MiniDisassembler(const MiniDisassembler &) = delete; + MiniDisassembler(MiniDisassembler &&) = delete; + + MiniDisassembler &operator=(const MiniDisassembler &) = delete; + MiniDisassembler &operator=(MiniDisassembler &&) = delete; + + absl::StatusOr DisassembleOne(uint64_t binary_address); + bool MayAffectControlFlow(const llvm::MCInst &inst); + llvm::StringRef GetInstructionName(const llvm::MCInst &inst) const; + absl::StatusOr MayAffectControlFlow(uint64_t binary_address); + + private: + explicit MiniDisassembler(const llvm::object::ObjectFile *object_file) + : object_file_(object_file) {} + + const llvm::object::ObjectFile *object_file_; + std::unique_ptr mri_; + std::unique_ptr asm_info_; + std::unique_ptr sti_; + std::unique_ptr mii_; + std::unique_ptr ctx_; + std::unique_ptr mia_; + std::unique_ptr disasm_; +}; +} // namespace propeller + +#endif // PROPELLER_MINI_DISASSEMBLER_H_ diff --git a/propeller/mini_disassembler_test.cc b/propeller/mini_disassembler_test.cc new file mode 100644 index 00000000000..c98342c2c2c --- /dev/null +++ b/propeller/mini_disassembler_test.cc @@ -0,0 +1,104 @@ +#include "propeller/mini_disassembler.h" + +#include +#include + +#include "propeller/status_testing_macros.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/status/status_matchers.h" +#include "absl/strings/str_cat.h" +#include "llvm/MC/MCInst.h" +#include "llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h" +#include "propeller/binary_content.h" + +namespace propeller { +namespace { +using ::absl_testing::IsOk; +using ::absl_testing::IsOkAndHolds; +using ::testing::Not; + +TEST(MiniDisassemblerTest, DisassembleOne) { + const std::string binary = + absl::StrCat(::testing::SrcDir(), + "_main/propeller/testdata/" + "llvm_function_samples.binary"); + ASSERT_OK_AND_ASSIGN(std::unique_ptr binary_content, + GetBinaryContent(binary)); + ASSERT_OK_AND_ASSIGN( + std::unique_ptr md, + MiniDisassembler::Create(binary_content->object_file.get())); + ASSERT_OK_AND_ASSIGN(llvm::MCInst inst, md->DisassembleOne(0x4008e4)); + EXPECT_EQ(inst.getOpcode(), llvm::X86::RET64); +} + +TEST(MiniDisassemblerTest, DisassembleOneFailure) { + const std::string binary = + absl::StrCat(::testing::SrcDir(), + "_main/propeller/testdata/" + "llvm_function_samples.binary"); + ASSERT_OK_AND_ASSIGN(std::unique_ptr binary_content, + GetBinaryContent(binary)); + ASSERT_OK_AND_ASSIGN( + std::unique_ptr md, + MiniDisassembler::Create(binary_content->object_file.get())); + EXPECT_THAT(md->DisassembleOne(0x999999999), Not(IsOk())); +} + +TEST(MiniDisassemblerTest, RetMayAffectControlFlow) { + const std::string binary = + absl::StrCat(::testing::SrcDir(), + "_main/propeller/testdata/" + "llvm_function_samples.binary"); + ASSERT_OK_AND_ASSIGN(std::unique_ptr binary_content, + GetBinaryContent(binary)); + ASSERT_OK_AND_ASSIGN( + std::unique_ptr md, + MiniDisassembler::Create(binary_content->object_file.get())); + ASSERT_OK_AND_ASSIGN(llvm::MCInst ret_inst, md->DisassembleOne(0x4008e4)); + EXPECT_TRUE(md->MayAffectControlFlow(ret_inst)); +} + +TEST(MiniDisassemblerTest, CallMayAffectControlFlow) { + const std::string binary = + absl::StrCat(::testing::SrcDir(), + "_main/propeller/testdata/" + "llvm_function_samples.binary"); + ASSERT_OK_AND_ASSIGN(std::unique_ptr binary_content, + GetBinaryContent(binary)); + ASSERT_OK_AND_ASSIGN( + std::unique_ptr md, + MiniDisassembler::Create(binary_content->object_file.get())); + ASSERT_OK_AND_ASSIGN(llvm::MCInst call_inst, md->DisassembleOne(0x4008c9)); + EXPECT_TRUE(md->MayAffectControlFlow(call_inst)); +} + +TEST(MiniDisassemblerTest, BranchMayAffectControlFlow) { + const std::string binary = + absl::StrCat(::testing::SrcDir(), + "_main/propeller/testdata/" + "llvm_function_samples.binary"); + ASSERT_OK_AND_ASSIGN(std::unique_ptr binary_content, + GetBinaryContent(binary)); + ASSERT_OK_AND_ASSIGN( + std::unique_ptr md, + MiniDisassembler::Create(binary_content->object_file.get())); + EXPECT_THAT(md->MayAffectControlFlow(0x4008b6), IsOkAndHolds(true)); +} + +TEST(MiniDisassemblerTest, PushMayNotAffectControlFlow) { + const std::string binary = + absl::StrCat(::testing::SrcDir(), + "_main/propeller/testdata/" + "llvm_function_samples.binary"); + ASSERT_OK_AND_ASSIGN(std::unique_ptr binary_content, + GetBinaryContent(binary)); + ASSERT_OK_AND_ASSIGN( + std::unique_ptr md, + MiniDisassembler::Create(binary_content->object_file.get())); + ASSERT_OK_AND_ASSIGN(llvm::MCInst push_inst, md->DisassembleOne(0x400590)); + EXPECT_FALSE(md->MayAffectControlFlow(push_inst)); +} + +} // namespace +} // namespace propeller diff --git a/propeller/testdata/BUILD b/propeller/testdata/BUILD index bbe04d83b4a..4361498de69 100644 --- a/propeller/testdata/BUILD +++ b/propeller/testdata/BUILD @@ -122,7 +122,7 @@ genrule( "propeller_sample_1.perfdata2.gen", ], cmd = "$(CC) $(CC_FLAGS) -O0 -gsplit-dwarf=split -gmlt -Wl,-build-id -pie -fbasic-block-sections=labels $< -o $(RULEDIR)/propeller_sample_1.bin.gen && mv propeller_sample_1.dwo $(RULEDIR)/ && " + - "$(location //third_party/llvm/llvm-project/llvm:llvm-dwp) -e $(RULEDIR)/propeller_sample_1.bin.gen -o $(RULEDIR)/propeller_sample_1.dwp.gen $(RULEDIR)/propeller_sample_1.dwo && " + + "$(location //llvm:llvm-dwp) -e $(RULEDIR)/propeller_sample_1.bin.gen -o $(RULEDIR)/propeller_sample_1.dwp.gen $(RULEDIR)/propeller_sample_1.dwo && " + "cd $(RULEDIR) ; /usr/bin/perf record -o propeller_sample_1.perfdata1.gen -e cycles -b -- ./propeller_sample_1.bin.gen ;" + "/usr/bin/perf record -o propeller_sample_1.perfdata2.gen -e cycles -b -- ./propeller_sample_1.bin.gen 1 2 3 4", tags = [ diff --git a/propeller/testdata/sample.arm.bin b/propeller/testdata/sample.arm.bin index e9512b9f675..782839e0f00 100755 Binary files a/propeller/testdata/sample.arm.bin and b/propeller/testdata/sample.arm.bin differ