Skip to content

Commit

Permalink
Add a library for disassembling machine instructions at a given address
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 700453553
  • Loading branch information
dhoekwater authored and copybara-github committed Nov 27, 2024
1 parent 2ad32b5 commit 9c3763f
Show file tree
Hide file tree
Showing 6 changed files with 358 additions and 1 deletion.
14 changes: 13 additions & 1 deletion CMake/LLVM/LLVM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,16 @@ endif()

# Add the external llvm project to the build.
add_subdirectory(${propeller_llvm_src_dir}/llvm ${propeller_llvm_build_dir}/llvm EXCLUDE_FROM_ALL)
include_directories(${propeller_llvm_src_dir}/llvm/include ${propeller_llvm_build_dir}/llvm/include)
include_directories(
${propeller_llvm_src_dir}
${propeller_llvm_src_dir}/llvm/include
${propeller_llvm_build_dir}
${propeller_llvm_build_dir}/llvm/include
)
# Add generated target-specific library directories.
foreach (tgt ${LLVM_TARGETS_TO_BUILD})
include_directories(
${propeller_llvm_src_dir}/llvm/lib/Target/${tgt}
${propeller_llvm_build_dir}/llvm/lib/Target/${tgt}
)
endforeach()
41 changes: 41 additions & 0 deletions propeller/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,29 @@ cc_library(
],
)

cc_library(
name = "mini_disassembler",
srcs = ["mini_disassembler.cc"],
hdrs = ["mini_disassembler.h"],
deps = [
"@abseil-cpp//absl/base:nullability",
"@abseil-cpp//absl/log",
"@abseil-cpp//absl/log:check",
"@abseil-cpp//absl/memory",
"@abseil-cpp//absl/status",
"@abseil-cpp//absl/status:statusor",
"@abseil-cpp//absl/strings",
"@abseil-cpp//absl/strings:str_format",
"@llvm-project//llvm:AllTargetsAsmParsers", # buildcleaner: keep
"@llvm-project//llvm:AllTargetsDisassemblers", # buildcleaner: keep
"@llvm-project//llvm:MC",
"@llvm-project//llvm:MCDisassembler",
"@llvm-project//llvm:Object",
"@llvm-project//llvm:Support",
"@llvm-project//llvm:TargetParser",
],
)

########################
# Tests & Test Utils #
########################
Expand Down Expand Up @@ -1125,3 +1148,21 @@ cc_test(
"@com_google_googletest//:gtest_main",
],
)

cc_test(
name = "mini_disassembler_test",
srcs = ["mini_disassembler_test.cc"],
data = [
"//propeller/testdata:llvm_function_samples.binary",
],
deps = [
":binary_content",
":mini_disassembler",
":status_testing_macros",
"@abseil-cpp//absl/status:status_matchers",
"@abseil-cpp//absl/strings",
"@com_google_googletest//:gtest_main",
"@llvm-project//llvm:MC",
"@llvm-project//llvm:X86UtilsAndDesc",
],
)
6 changes: 6 additions & 0 deletions propeller/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ add_library(propeller_lib OBJECT
file_perf_data_provider.cc
frequencies_branch_aggregator.cc
lbr_branch_aggregator.cc
mini_disassembler.cc
node_chain.cc
node_chain_assembly.cc
node_chain_builder.cc
Expand All @@ -66,6 +67,11 @@ target_link_libraries(propeller_lib
quipper_protos
# keep-sorted end
)
foreach (tgt ${LLVM_TARGETS_TO_BUILD})
foreach (tool AsmParser Desc Disassembler Info)
target_link_libraries(propeller_lib LLVM${tgt}${tool})
endforeach()
endforeach()

# Build all CXX test utilities into a unified library.
add_library(propeller_test_lib OBJECT
Expand Down
139 changes: 139 additions & 0 deletions propeller/mini_disassembler.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include "propeller/mini_disassembler.h"

#include <cstdint>
#include <memory>
#include <string>

#include "absl/base/nullability.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/memory/memory.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"

namespace propeller {
absl::StatusOr<absl::Nonnull<std::unique_ptr<MiniDisassembler>>>
MiniDisassembler::Create(const llvm::object::ObjectFile *object_file) {
auto disassembler =
absl::WrapUnique<MiniDisassembler>(new MiniDisassembler(object_file));

std::string err;
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();

llvm::Triple triple;
triple.setArch(
llvm::Triple::ArchType(object_file->getArch()));
const llvm::Target *target =
llvm::TargetRegistry::lookupTarget(triple.normalize(), err);
if (target == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"no target for triple '%s': %s", triple.getArchName(), err));
}
disassembler->mri_ =
absl::WrapUnique(target->createMCRegInfo(triple.getTriple()));
if (disassembler->mri_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCRegInfo failed for triple '%s'", triple.getArchName()));
}
disassembler->asm_info_ = absl::WrapUnique(target->createMCAsmInfo(
*disassembler->mri_, triple.getTriple(), llvm::MCTargetOptions()));
if (disassembler->asm_info_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCAsmInfo failed for triple '%s'", triple.getArchName()));
}

disassembler->sti_ = absl::WrapUnique(target->createMCSubtargetInfo(
triple.getTriple(), /*CPU=*/"", /*Features=*/""));
if (disassembler->sti_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCSubtargetInfo failed for triple '%s'", triple.getArchName()));
}

disassembler->mii_ = absl::WrapUnique(target->createMCInstrInfo());
if (disassembler->mii_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCInstrInfo failed for triple '%s'", triple.getArchName()));
}

disassembler->mia_ =
absl::WrapUnique(target->createMCInstrAnalysis(disassembler->mii_.get()));
if (disassembler->mia_ == nullptr) {
return absl::FailedPreconditionError(absl::StrFormat(
"createMCInstrAnalysis failed for triple '%s'", triple.getArchName()));
}

disassembler->ctx_ = std::make_unique<llvm::MCContext>(
triple, disassembler->asm_info_.get(), disassembler->mri_.get(),
disassembler->sti_.get());
disassembler->disasm_ = absl::WrapUnique(
target->createMCDisassembler(*disassembler->sti_, *disassembler->ctx_));
if (disassembler->disasm_ == nullptr)
return absl::FailedPreconditionError(
absl::StrFormat("createMCDisassembler failed"));

return disassembler;
}

absl::StatusOr<llvm::MCInst> MiniDisassembler::DisassembleOne(
uint64_t binary_address) {
for (const auto &section : object_file_->sections()) {
if (!section.isText() || section.isVirtual()) {
continue;
}
if (binary_address < section.getAddress() ||
binary_address >= section.getAddress() + section.getSize()) {
continue;
}
llvm::Expected<llvm::StringRef> content = section.getContents();
if (!content) {
return absl::FailedPreconditionError("section has no content");
}
llvm::ArrayRef<uint8_t> content_bytes(
reinterpret_cast<const uint8_t *>(content->data()), content->size());
uint64_t section_offset = binary_address - section.getAddress();
llvm::MCInst inst;
uint64_t size;
if (!disasm_->getInstruction(inst, size,
content_bytes.slice(section_offset),
binary_address, llvm::nulls())) {
return absl::FailedPreconditionError(absl::StrFormat(
"getInstruction failed at binary address 0x%lx", binary_address));
}
return inst;
}
return absl::FailedPreconditionError(absl::StrFormat(
"no section containing address 0x%lx found", binary_address));
}

bool MiniDisassembler::MayAffectControlFlow(const llvm::MCInst &inst) {
return mii_->get(inst.getOpcode()).mayAffectControlFlow(inst, *mri_);
}

llvm::StringRef MiniDisassembler::GetInstructionName(
const llvm::MCInst &inst) const {
return mii_->getName(inst.getOpcode());
}

absl::StatusOr<bool> MiniDisassembler::MayAffectControlFlow(
uint64_t binary_address) {
auto inst = DisassembleOne(binary_address);
if (!inst.ok()) return inst.status();
return MayAffectControlFlow(inst.value());
}
} // namespace propeller
55 changes: 55 additions & 0 deletions propeller/mini_disassembler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#ifndef PROPELLER_MINI_DISASSEMBLER_H_
#define PROPELLER_MINI_DISASSEMBLER_H_

#include <cstdint>
#include <memory>

#include "absl/base/nullability.h"
#include "absl/status/statusor.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/ObjectFile.h"

namespace propeller {
class MiniDisassembler {
public:
// Creates a MiniDisassembler for `object_file`. Does not take ownership of
// `object_file`, which must point to a valid object that outlives the
// `MiniDisassembler`.
static absl::StatusOr<absl::Nonnull<std::unique_ptr<MiniDisassembler>>>
Create(const llvm::object::ObjectFile *object_file);

MiniDisassembler(const MiniDisassembler &) = delete;
MiniDisassembler(MiniDisassembler &&) = delete;

MiniDisassembler &operator=(const MiniDisassembler &) = delete;
MiniDisassembler &operator=(MiniDisassembler &&) = delete;

absl::StatusOr<llvm::MCInst> DisassembleOne(uint64_t binary_address);
bool MayAffectControlFlow(const llvm::MCInst &inst);
llvm::StringRef GetInstructionName(const llvm::MCInst &inst) const;
absl::StatusOr<bool> MayAffectControlFlow(uint64_t binary_address);

private:
explicit MiniDisassembler(const llvm::object::ObjectFile *object_file)
: object_file_(object_file) {}

const llvm::object::ObjectFile *object_file_;
std::unique_ptr<const llvm::MCRegisterInfo> mri_;
std::unique_ptr<const llvm::MCAsmInfo> asm_info_;
std::unique_ptr<const llvm::MCSubtargetInfo> sti_;
std::unique_ptr<const llvm::MCInstrInfo> mii_;
std::unique_ptr<llvm::MCContext> ctx_;
std::unique_ptr<const llvm::MCInstrAnalysis> mia_;
std::unique_ptr<const llvm::MCDisassembler> disasm_;
};
} // namespace propeller

#endif // PROPELLER_MINI_DISASSEMBLER_H_
104 changes: 104 additions & 0 deletions propeller/mini_disassembler_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "propeller/mini_disassembler.h"

#include <memory>
#include <string>

#include "propeller/status_testing_macros.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/status/status_matchers.h"
#include "absl/strings/str_cat.h"
#include "llvm/MC/MCInst.h"
#include "lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h"
#include "propeller/binary_content.h"

namespace propeller {
namespace {
using ::absl_testing::IsOk;
using ::absl_testing::IsOkAndHolds;
using ::testing::Not;

TEST(MiniDisassemblerTest, DisassembleOne) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
ASSERT_OK_AND_ASSIGN(llvm::MCInst inst, md->DisassembleOne(0x4008e4));
EXPECT_EQ(inst.getOpcode(), llvm::X86::RET64);
}

TEST(MiniDisassemblerTest, DisassembleOneFailure) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
EXPECT_THAT(md->DisassembleOne(0x999999999), Not(IsOk()));
}

TEST(MiniDisassemblerTest, RetMayAffectControlFlow) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
ASSERT_OK_AND_ASSIGN(llvm::MCInst ret_inst, md->DisassembleOne(0x4008e4));
EXPECT_TRUE(md->MayAffectControlFlow(ret_inst));
}

TEST(MiniDisassemblerTest, CallMayAffectControlFlow) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
ASSERT_OK_AND_ASSIGN(llvm::MCInst call_inst, md->DisassembleOne(0x4008c9));
EXPECT_TRUE(md->MayAffectControlFlow(call_inst));
}

TEST(MiniDisassemblerTest, BranchMayAffectControlFlow) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
EXPECT_THAT(md->MayAffectControlFlow(0x4008b6), IsOkAndHolds(true));
}

TEST(MiniDisassemblerTest, PushMayNotAffectControlFlow) {
const std::string binary =
absl::StrCat(::testing::SrcDir(),
"_main/propeller/testdata/"
"llvm_function_samples.binary");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<MiniDisassembler> md,
MiniDisassembler::Create(binary_content->object_file.get()));
ASSERT_OK_AND_ASSIGN(llvm::MCInst push_inst, md->DisassembleOne(0x400590));
EXPECT_FALSE(md->MayAffectControlFlow(push_inst));
}

} // namespace
} // namespace propeller

0 comments on commit 9c3763f

Please sign in to comment.