Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a library for disassembling machine instructions at a given address #214

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion CMake/LLVM/LLVM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,16 @@ endif()

# Add the external llvm project to the build.
add_subdirectory(${propeller_llvm_src_dir}/llvm ${propeller_llvm_build_dir}/llvm EXCLUDE_FROM_ALL)
include_directories(${propeller_llvm_src_dir}/llvm/include ${propeller_llvm_build_dir}/llvm/include)
include_directories(
${propeller_llvm_src_dir}
${propeller_llvm_src_dir}/llvm/include
${propeller_llvm_build_dir}
${propeller_llvm_build_dir}/llvm/include
)
# Add generated target-specific library directories.
foreach (tgt ${LLVM_TARGETS_TO_BUILD})
include_directories(
${propeller_llvm_src_dir}/llvm/lib/Target/${tgt}
${propeller_llvm_build_dir}/llvm/lib/Target/${tgt}
)
endforeach()
41 changes: 41 additions & 0 deletions propeller/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,29 @@ cc_library(
],
)

cc_library(
name = "mini_disassembler",
srcs = ["mini_disassembler.cc"],
hdrs = ["mini_disassembler.h"],
deps = [
"@abseil-cpp//absl/base:nullability",
"@abseil-cpp//absl/log",
"@abseil-cpp//absl/log:check",
"@abseil-cpp//absl/memory",
"@abseil-cpp//absl/status",
"@abseil-cpp//absl/status:statusor",
"@abseil-cpp//absl/strings",
"@abseil-cpp//absl/strings:str_format",
"@llvm-project//llvm:AllTargetsAsmParsers", # buildcleaner: keep
"@llvm-project//llvm:AllTargetsDisassemblers", # buildcleaner: keep
"@llvm-project//llvm:MC",
"@llvm-project//llvm:MCDisassembler",
"@llvm-project//llvm:Object",
"@llvm-project//llvm:Support",
"@llvm-project//llvm:TargetParser",
],
)

########################
# Tests & Test Utils #
########################
Expand Down Expand Up @@ -1125,3 +1148,21 @@ cc_test(
"@com_google_googletest//:gtest_main",
],
)

cc_test(
name = "mini_disassembler_test",
srcs = ["mini_disassembler_test.cc"],
data = [
"//propeller/testdata:llvm_function_samples.binary",
],
deps = [
":binary_content",
":mini_disassembler",
":status_testing_macros",
"@abseil-cpp//absl/status:status_matchers",
"@abseil-cpp//absl/strings",
"@com_google_googletest//:gtest_main",
"@llvm-project//llvm:MC",
"@llvm-project//llvm:X86UtilsAndDesc",
],
)
6 changes: 6 additions & 0 deletions propeller/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ add_library(propeller_lib OBJECT
file_perf_data_provider.cc
frequencies_branch_aggregator.cc
lbr_branch_aggregator.cc
mini_disassembler.cc
node_chain.cc
node_chain_assembly.cc
node_chain_builder.cc
Expand All @@ -66,6 +67,11 @@ target_link_libraries(propeller_lib
quipper_protos
# keep-sorted end
)
foreach (tgt ${LLVM_TARGETS_TO_BUILD})
foreach (tool AsmParser Desc Disassembler Info)
target_link_libraries(propeller_lib LLVM${tgt}${tool})
endforeach()
endforeach()

# Build all CXX test utilities into a unified library.
add_library(propeller_test_lib OBJECT
Expand Down
139 changes: 139 additions & 0 deletions propeller/mini_disassembler.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include "propeller/mini_disassembler.h"

#include <cstdint>
#include <memory>
#include <string>

#include "absl/base/nullability.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"

namespace propeller {
absl::StatusOr<absl::Nonnull<std::unique_ptr<MiniDisassembler>>>
MiniDisassembler::Create(const llvm::object::ObjectFile *object_file) {
auto disassembler =
absl::WrapUnique<MiniDisassembler>(new MiniDisassembler(object_file));

std::string err;
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();

llvm::Triple triple;
triple.setArch(
llvm::Triple::ArchType(object_file->getArch()));
const llvm::Target *target =
llvm::TargetRegistry::lookupTarget(triple.normalize(), err);
if (target == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"no target for triple '%s': %s", triple.getArchName(), err));
}
disassembler->mri_ =
absl::WrapUnique(target->createMCRegInfo(triple.getTriple()));
if (disassembler->mri_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCRegInfo failed for triple '%s'", triple.getArchName()));
}
disassembler->asm_info_ = absl::WrapUnique(target->createMCAsmInfo(
*disassembler->mri_, triple.getTriple(), llvm::MCTargetOptions()));
if (disassembler->asm_info_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCAsmInfo failed for triple '%s'", triple.getArchName()));
}

disassembler->sti_ = absl::WrapUnique(target->createMCSubtargetInfo(
triple.getTriple(), /*CPU=*/"", /*Features=*/""));
if (disassembler->sti_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCSubtargetInfo failed for triple '%s'", triple.getArchName()));
}

disassembler->mii_ = absl::WrapUnique(target->createMCInstrInfo());
if (disassembler->mii_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCInstrInfo failed for triple '%s'", triple.getArchName()));
}

disassembler->mia_ =
absl::WrapUnique(target->createMCInstrAnalysis(disassembler->mii_.get()));
if (disassembler->mia_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCInstrAnalysis failed for triple '%s'", triple.getArchName()));
}

disassembler->ctx_ = std::make_unique<llvm::MCContext>(
triple, disassembler->asm_info_.get(), disassembler->mri_.get(),
disassembler->sti_.get());
disassembler->disasm_ = absl::WrapUnique(
target->createMCDisassembler(*disassembler->sti_, *disassembler->ctx_));
if (disassembler->disasm_ == nullptr)
return absl::FailedPreconditionError(
absl::StrFormat("createMCDisassembler failed"));

return disassembler;
}

absl::StatusOr<llvm::MCInst> MiniDisassembler::DisassembleOne(
uint64_t binary_address) {
for (const auto &section : object_file_->sections()) {
if (!section.isText() || section.isVirtual()) {
continue;
}
if (binary_address < section.getAddress() ||
binary_address >= section.getAddress() + section.getSize()) {
continue;
}
llvm::Expected<llvm::StringRef> content = section.getContents();
if (!content) {
return absl::FailedPreconditionError("section has no content");
}
llvm::ArrayRef<uint8_t> content_bytes(
reinterpret_cast<const uint8_t *>(content->data()), content->size());
uint64_t section_offset = binary_address - section.getAddress();
llvm::MCInst inst;
uint64_t size;
if (!disasm_->getInstruction(inst, size,
content_bytes.slice(section_offset),
binary_address, llvm::nulls())) {
return absl::FailedPreconditionError(absl::StrFormat(
"getInstruction failed at binary address 0x%lx", binary_address));
}
return inst;
}
return absl::FailedPreconditionError(absl::StrFormat(
"no section containing address 0x%lx found", binary_address));
}

bool MiniDisassembler::MayAffectControlFlow(const llvm::MCInst &inst) {
return mii_->get(inst.getOpcode()).mayAffectControlFlow(inst, *mri_);
}

llvm::StringRef MiniDisassembler::GetInstructionName(
const llvm::MCInst &inst) const {
return mii_->getName(inst.getOpcode());
}

absl::StatusOr<bool> MiniDisassembler::MayAffectControlFlow(
uint64_t binary_address) {
auto inst = DisassembleOne(binary_address);
if (!inst.ok()) return inst.status();
return MayAffectControlFlow(inst.value());
}
} // namespace propeller
55 changes: 55 additions & 0 deletions propeller/mini_disassembler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#ifndef PROPELLER_MINI_DISASSEMBLER_H_
#define PROPELLER_MINI_DISASSEMBLER_H_

#include <cstdint>
#include <memory>

#include "absl/base/nullability.h"
#include "absl/status/statusor.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/ObjectFile.h"

namespace propeller {
class MiniDisassembler {
public:
// Creates a MiniDisassembler for `object_file`. Does not take ownership of
// `object_file`, which must point to a valid object that outlives the
// `MiniDisassembler`.
static absl::StatusOr<absl::Nonnull<std::unique_ptr<MiniDisassembler>>>
Create(const llvm::object::ObjectFile *object_file);

MiniDisassembler(const MiniDisassembler &) = delete;
MiniDisassembler(MiniDisassembler &&) = delete;

MiniDisassembler &operator=(const MiniDisassembler &) = delete;
MiniDisassembler &operator=(MiniDisassembler &&) = delete;

absl::StatusOr<llvm::MCInst> DisassembleOne(uint64_t binary_address);
bool MayAffectControlFlow(const llvm::MCInst &inst);
llvm::StringRef GetInstructionName(const llvm::MCInst &inst) const;
absl::StatusOr<bool> MayAffectControlFlow(uint64_t binary_address);

private:
explicit MiniDisassembler(const llvm::object::ObjectFile *object_file)
: object_file_(object_file) {}

const llvm::object::ObjectFile *object_file_;
std::unique_ptr<const llvm::MCRegisterInfo> mri_;
std::unique_ptr<const llvm::MCAsmInfo> asm_info_;
std::unique_ptr<const llvm::MCSubtargetInfo> sti_;
std::unique_ptr<const llvm::MCInstrInfo> mii_;
std::unique_ptr<llvm::MCContext> ctx_;
std::unique_ptr<const llvm::MCInstrAnalysis> mia_;
std::unique_ptr<const llvm::MCDisassembler> disasm_;
};
} // namespace propeller

#endif // PROPELLER_MINI_DISASSEMBLER_H_
104 changes: 104 additions & 0 deletions propeller/mini_disassembler_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "propeller/mini_disassembler.h"

#include <memory>
#include <string>

#include "propeller/status_testing_macros.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/status/status_matchers.h"
#include "absl/strings/str_cat.h"
#include "llvm/MC/MCInst.h"
#include "llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h"
#include "propeller/binary_content.h"

namespace propeller {
namespace {
using ::absl_testing::IsOk;
using ::absl_testing::IsOkAndHolds;
using ::testing::Not;

TEST(MiniDisassemblerTest, DisassembleOne) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
ASSERT_OK_AND_ASSIGN(llvm::MCInst inst, md->DisassembleOne(0x4008e4));
EXPECT_EQ(inst.getOpcode(), llvm::X86::RET64);
}

TEST(MiniDisassemblerTest, DisassembleOneFailure) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
EXPECT_THAT(md->DisassembleOne(0x999999999), Not(IsOk()));
}

TEST(MiniDisassemblerTest, RetMayAffectControlFlow) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
ASSERT_OK_AND_ASSIGN(llvm::MCInst ret_inst, md->DisassembleOne(0x4008e4));
EXPECT_TRUE(md->MayAffectControlFlow(ret_inst));
}

TEST(MiniDisassemblerTest, CallMayAffectControlFlow) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
ASSERT_OK_AND_ASSIGN(llvm::MCInst call_inst, md->DisassembleOne(0x4008c9));
EXPECT_TRUE(md->MayAffectControlFlow(call_inst));
}

TEST(MiniDisassemblerTest, BranchMayAffectControlFlow) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
EXPECT_THAT(md->MayAffectControlFlow(0x4008b6), IsOkAndHolds(true));
}

TEST(MiniDisassemblerTest, PushMayNotAffectControlFlow) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
ASSERT_OK_AND_ASSIGN(llvm::MCInst push_inst, md->DisassembleOne(0x400590));
EXPECT_FALSE(md->MayAffectControlFlow(push_inst));
}

} // namespace
} // namespace propeller
2 changes: 1 addition & 1 deletion propeller/testdata/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ genrule(
"propeller_sample_1.perfdata2.gen",
],
cmd = "$(CC) $(CC_FLAGS) -O0 -gsplit-dwarf=split -gmlt -Wl,-build-id -pie -fbasic-block-sections=labels $< -o $(RULEDIR)/propeller_sample_1.bin.gen && mv propeller_sample_1.dwo $(RULEDIR)/ && " +
"$(location //third_party/llvm/llvm-project/llvm:llvm-dwp) -e $(RULEDIR)/propeller_sample_1.bin.gen -o $(RULEDIR)/propeller_sample_1.dwp.gen $(RULEDIR)/propeller_sample_1.dwo && " +
"$(location //llvm:llvm-dwp) -e $(RULEDIR)/propeller_sample_1.bin.gen -o $(RULEDIR)/propeller_sample_1.dwp.gen $(RULEDIR)/propeller_sample_1.dwo && " +
"cd $(RULEDIR) ; /usr/bin/perf record -o propeller_sample_1.perfdata1.gen -e cycles -b -- ./propeller_sample_1.bin.gen ;" +
"/usr/bin/perf record -o propeller_sample_1.perfdata2.gen -e cycles -b -- ./propeller_sample_1.bin.gen 1 2 3 4",
tags = [
Expand Down
Binary file modified propeller/testdata/sample.arm.bin
Binary file not shown.