Skip to content

Commit dda5f55

Browse files
dhoekwatercopybara-github
authored andcommitted
Add a library for disassembling machine instructions at a given address
PiperOrigin-RevId: 700453553
1 parent c2434b0 commit dda5f55

7 files changed

+692
-0
lines changed

propeller/BUILD

+63
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,51 @@ cc_library(
627627
],
628628
)
629629

630+
cc_library(
631+
name = "profile_writer",
632+
srcs = ["profile_writer.cc"],
633+
hdrs = ["profile_writer.h"],
634+
deps = [
635+
":cfg",
636+
":cfg_edge",
637+
":cfg_id",
638+
":cfg_node",
639+
":function_chain_info",
640+
":profile",
641+
":propeller_options_cc_proto",
642+
"@abseil-cpp//absl/algorithm:container",
643+
"@abseil-cpp//absl/container:flat_hash_map",
644+
"@abseil-cpp//absl/log",
645+
"@abseil-cpp//absl/log:check",
646+
"@abseil-cpp//absl/strings",
647+
"@abseil-cpp//absl/strings:str_format",
648+
"@llvm-project//llvm:Support",
649+
],
650+
)
651+
652+
cc_library(
653+
name = "mini_disassembler",
654+
srcs = ["mini_disassembler.cc"],
655+
hdrs = ["mini_disassembler.h"],
656+
deps = [
657+
"@abseil-cpp//absl/base:nullability",
658+
"@abseil-cpp//absl/log",
659+
"@abseil-cpp//absl/log:check",
660+
"@abseil-cpp//absl/memory",
661+
"@abseil-cpp//absl/status",
662+
"@abseil-cpp//absl/status:statusor",
663+
"@abseil-cpp//absl/strings",
664+
"@abseil-cpp//absl/strings:str_format",
665+
"@llvm-project//llvm:AllTargetsAsmParsers", # buildcleaner: keep
666+
"@llvm-project//llvm:AllTargetsDisassemblers", # buildcleaner: keep
667+
"@llvm-project//llvm:MC",
668+
"@llvm-project//llvm:MCDisassembler",
669+
"@llvm-project//llvm:Object",
670+
"@llvm-project//llvm:Support",
671+
"@llvm-project//llvm:TargetParser",
672+
],
673+
)
674+
630675
########################
631676
# Tests & Test Utils #
632677
########################
@@ -1103,3 +1148,21 @@ cc_test(
11031148
"@com_google_googletest//:gtest_main",
11041149
],
11051150
)
1151+
1152+
cc_test(
1153+
name = "mini_disassembler_test",
1154+
srcs = ["mini_disassembler_test.cc"],
1155+
data = [
1156+
"//propeller/testdata:llvm_function_samples.binary",
1157+
],
1158+
deps = [
1159+
":binary_content",
1160+
":mini_disassembler",
1161+
":status_testing_macros",
1162+
"@abseil-cpp//absl/status:status_matchers",
1163+
"@abseil-cpp//absl/strings",
1164+
"@com_google_googletest//:gtest_main",
1165+
"@llvm-project//llvm:MC",
1166+
"@llvm-project//llvm:X86UtilsAndDesc",
1167+
],
1168+
)

propeller/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,13 @@ add_library(propeller_lib OBJECT
4242
file_perf_data_provider.cc
4343
frequencies_branch_aggregator.cc
4444
lbr_branch_aggregator.cc
45+
mini_disassembler.cc
4546
node_chain.cc
4647
node_chain_assembly.cc
4748
node_chain_builder.cc
4849
perf_branch_frequencies_aggregator.cc
4950
perfdata_reader.cc
51+
profile_writer.cc
5052
program_cfg.cc
5153
program_cfg_builder.cc
5254
propeller_statistics.cc

propeller/mini_disassembler.cc

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#include "propeller/mini_disassembler.h"
2+
3+
#include <cstdint>
4+
#include <memory>
5+
#include <string>
6+
7+
#include "absl/base/nullability.h"
8+
#include "absl/log/check.h"
9+
#include "absl/log/log.h"
10+
#include "absl/memory/memory.h"
11+
#include "absl/status/status.h"
12+
#include "absl/status/statusor.h"
13+
#include "absl/strings/str_format.h"
14+
#include "absl/strings/string_view.h"
15+
#include "llvm/ADT/ArrayRef.h"
16+
#include "llvm/ADT/StringRef.h"
17+
#include "llvm/MC/MCInst.h"
18+
#include "llvm/MC/MCInstrDesc.h"
19+
#include "llvm/MC/MCInstrInfo.h"
20+
#include "llvm/MC/TargetRegistry.h"
21+
#include "llvm/Object/ObjectFile.h"
22+
#include "llvm/Support/Error.h"
23+
#include "llvm/Support/TargetSelect.h"
24+
#include "llvm/Support/raw_ostream.h"
25+
#include "llvm/TargetParser/Triple.h"
26+
27+
namespace propeller {
28+
absl::StatusOr<absl::Nonnull<std::unique_ptr<MiniDisassembler>>>
29+
MiniDisassembler::Create(const llvm::object::ObjectFile *object_file) {
30+
auto disassembler =
31+
absl::WrapUnique<MiniDisassembler>(new MiniDisassembler(object_file));
32+
33+
std::string err;
34+
llvm::InitializeAllTargetInfos();
35+
llvm::InitializeAllTargetMCs();
36+
llvm::InitializeAllAsmParsers();
37+
llvm::InitializeAllDisassemblers();
38+
39+
llvm::Triple triple;
40+
triple.setArch(
41+
llvm::Triple::ArchType(object_file->getArch()));
42+
const llvm::Target *target =
43+
llvm::TargetRegistry::lookupTarget(triple.normalize(), err);
44+
if (target == nullptr) {
45+
return absl::FailedPreconditionError(absl::StrFormat(
46+
"no target for triple '%s': %s", triple.getArchName(), err));
47+
}
48+
disassembler->mri_ =
49+
absl::WrapUnique(target->createMCRegInfo(triple.getTriple()));
50+
if (disassembler->mri_ == nullptr) {
51+
return absl::FailedPreconditionError(absl::StrFormat(
52+
"createMCRegInfo failed for triple '%s'", triple.getArchName()));
53+
}
54+
disassembler->asm_info_ = absl::WrapUnique(target->createMCAsmInfo(
55+
*disassembler->mri_, triple.getTriple(), llvm::MCTargetOptions()));
56+
if (disassembler->asm_info_ == nullptr) {
57+
return absl::FailedPreconditionError(absl::StrFormat(
58+
"createMCAsmInfo failed for triple '%s'", triple.getArchName()));
59+
}
60+
61+
disassembler->sti_ = absl::WrapUnique(target->createMCSubtargetInfo(
62+
triple.getTriple(), /*CPU=*/"", /*Features=*/""));
63+
if (disassembler->sti_ == nullptr) {
64+
return absl::FailedPreconditionError(absl::StrFormat(
65+
"createMCSubtargetInfo failed for triple '%s'", triple.getArchName()));
66+
}
67+
68+
disassembler->mii_ = absl::WrapUnique(target->createMCInstrInfo());
69+
if (disassembler->mii_ == nullptr) {
70+
return absl::FailedPreconditionError(absl::StrFormat(
71+
"createMCInstrInfo failed for triple '%s'", triple.getArchName()));
72+
}
73+
74+
disassembler->mia_ =
75+
absl::WrapUnique(target->createMCInstrAnalysis(disassembler->mii_.get()));
76+
if (disassembler->mia_ == nullptr) {
77+
return absl::FailedPreconditionError(absl::StrFormat(
78+
"createMCInstrAnalysis failed for triple '%s'", triple.getArchName()));
79+
}
80+
81+
disassembler->ctx_ = std::make_unique<llvm::MCContext>(
82+
triple, disassembler->asm_info_.get(), disassembler->mri_.get(),
83+
disassembler->sti_.get());
84+
disassembler->disasm_ = absl::WrapUnique(
85+
target->createMCDisassembler(*disassembler->sti_, *disassembler->ctx_));
86+
if (disassembler->disasm_ == nullptr)
87+
return absl::FailedPreconditionError(
88+
absl::StrFormat("createMCDisassembler failed"));
89+
90+
return disassembler;
91+
}
92+
93+
absl::StatusOr<llvm::MCInst> MiniDisassembler::DisassembleOne(
94+
uint64_t binary_address) {
95+
for (const auto &section : object_file_->sections()) {
96+
if (!section.isText() || section.isVirtual()) {
97+
continue;
98+
}
99+
if (binary_address < section.getAddress() ||
100+
binary_address >= section.getAddress() + section.getSize()) {
101+
continue;
102+
}
103+
llvm::Expected<llvm::StringRef> content = section.getContents();
104+
if (!content) {
105+
return absl::FailedPreconditionError("section has no content");
106+
}
107+
llvm::ArrayRef<uint8_t> content_bytes(
108+
reinterpret_cast<const uint8_t *>(content->data()), content->size());
109+
uint64_t section_offset = binary_address - section.getAddress();
110+
llvm::MCInst inst;
111+
uint64_t size;
112+
if (!disasm_->getInstruction(inst, size,
113+
content_bytes.slice(section_offset),
114+
binary_address, llvm::nulls())) {
115+
return absl::FailedPreconditionError(absl::StrFormat(
116+
"getInstruction failed at binary address 0x%lx", binary_address));
117+
}
118+
return inst;
119+
}
120+
return absl::FailedPreconditionError(absl::StrFormat(
121+
"no section containing address 0x%lx found", binary_address));
122+
}
123+
124+
bool MiniDisassembler::MayAffectControlFlow(const llvm::MCInst &inst) {
125+
return mii_->get(inst.getOpcode()).mayAffectControlFlow(inst, *mri_);
126+
}
127+
128+
llvm::StringRef MiniDisassembler::GetInstructionName(
129+
const llvm::MCInst &inst) const {
130+
return mii_->getName(inst.getOpcode());
131+
}
132+
133+
absl::StatusOr<bool> MiniDisassembler::MayAffectControlFlow(
134+
uint64_t binary_address) {
135+
auto inst = DisassembleOne(binary_address);
136+
if (!inst.ok()) return inst.status();
137+
return MayAffectControlFlow(inst.value());
138+
}
139+
} // namespace propeller

propeller/mini_disassembler.h

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#ifndef PROPELLER_MINI_DISASSEMBLER_H_
2+
#define PROPELLER_MINI_DISASSEMBLER_H_
3+
4+
#include <cstdint>
5+
#include <memory>
6+
7+
#include "absl/base/nullability.h"
8+
#include "absl/status/statusor.h"
9+
#include "llvm/ADT/StringRef.h"
10+
#include "llvm/MC/MCAsmInfo.h"
11+
#include "llvm/MC/MCContext.h"
12+
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
13+
#include "llvm/MC/MCInst.h"
14+
#include "llvm/MC/MCInstrAnalysis.h"
15+
#include "llvm/MC/MCInstrInfo.h"
16+
#include "llvm/MC/MCRegisterInfo.h"
17+
#include "llvm/MC/MCSubtargetInfo.h"
18+
#include "llvm/Object/ObjectFile.h"
19+
20+
namespace propeller {
21+
class MiniDisassembler {
22+
public:
23+
// Creates a MiniDisassembler for `object_file`. Does not take ownership of
24+
// `object_file`, which must point to a valid object that outlives the
25+
// `MiniDisassembler`.
26+
static absl::StatusOr<absl::Nonnull<std::unique_ptr<MiniDisassembler>>>
27+
Create(const llvm::object::ObjectFile *object_file);
28+
29+
MiniDisassembler(const MiniDisassembler &) = delete;
30+
MiniDisassembler(MiniDisassembler &&) = delete;
31+
32+
MiniDisassembler &operator=(const MiniDisassembler &) = delete;
33+
MiniDisassembler &operator=(MiniDisassembler &&) = delete;
34+
35+
absl::StatusOr<llvm::MCInst> DisassembleOne(uint64_t binary_address);
36+
bool MayAffectControlFlow(const llvm::MCInst &inst);
37+
llvm::StringRef GetInstructionName(const llvm::MCInst &inst) const;
38+
absl::StatusOr<bool> MayAffectControlFlow(uint64_t binary_address);
39+
40+
private:
41+
explicit MiniDisassembler(const llvm::object::ObjectFile *object_file)
42+
: object_file_(object_file) {}
43+
44+
const llvm::object::ObjectFile *object_file_;
45+
std::unique_ptr<const llvm::MCRegisterInfo> mri_;
46+
std::unique_ptr<const llvm::MCAsmInfo> asm_info_;
47+
std::unique_ptr<const llvm::MCSubtargetInfo> sti_;
48+
std::unique_ptr<const llvm::MCInstrInfo> mii_;
49+
std::unique_ptr<llvm::MCContext> ctx_;
50+
std::unique_ptr<const llvm::MCInstrAnalysis> mia_;
51+
std::unique_ptr<const llvm::MCDisassembler> disasm_;
52+
};
53+
} // namespace propeller
54+
55+
#endif // PROPELLER_MINI_DISASSEMBLER_H_

propeller/mini_disassembler_test.cc

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#include "propeller/mini_disassembler.h"
2+
3+
#include <memory>
4+
#include <string>
5+
6+
#include "propeller/status_testing_macros.h"
7+
#include "gmock/gmock.h"
8+
#include "gtest/gtest.h"
9+
#include "absl/status/status_matchers.h"
10+
#include "absl/strings/str_cat.h"
11+
#include "llvm/MC/MCInst.h"
12+
#include "third_party/llvm/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h"
13+
#include "propeller/binary_content.h"
14+
15+
namespace propeller {
16+
namespace {
17+
using ::absl_testing::IsOk;
18+
using ::absl_testing::IsOkAndHolds;
19+
using ::testing::Not;
20+
21+
TEST(MiniDisassemblerTest, DisassembleOne) {
22+
const std::string binary =
23+
absl::StrCat(::testing::SrcDir(),
24+
"_main/propeller/testdata/"
25+
"llvm_function_samples.binary");
26+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
27+
GetBinaryContent(binary));
28+
ASSERT_OK_AND_ASSIGN(
29+
std::unique_ptr<MiniDisassembler> md,
30+
MiniDisassembler::Create(binary_content->object_file.get()));
31+
ASSERT_OK_AND_ASSIGN(llvm::MCInst inst, md->DisassembleOne(0x4008e4));
32+
EXPECT_EQ(inst.getOpcode(), llvm::X86::RET64);
33+
}
34+
35+
TEST(MiniDisassemblerTest, DisassembleOneFailure) {
36+
const std::string binary =
37+
absl::StrCat(::testing::SrcDir(),
38+
"_main/propeller/testdata/"
39+
"llvm_function_samples.binary");
40+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
41+
GetBinaryContent(binary));
42+
ASSERT_OK_AND_ASSIGN(
43+
std::unique_ptr<MiniDisassembler> md,
44+
MiniDisassembler::Create(binary_content->object_file.get()));
45+
EXPECT_THAT(md->DisassembleOne(0x999999999), Not(IsOk()));
46+
}
47+
48+
TEST(MiniDisassemblerTest, RetMayAffectControlFlow) {
49+
const std::string binary =
50+
absl::StrCat(::testing::SrcDir(),
51+
"_main/propeller/testdata/"
52+
"llvm_function_samples.binary");
53+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
54+
GetBinaryContent(binary));
55+
ASSERT_OK_AND_ASSIGN(
56+
std::unique_ptr<MiniDisassembler> md,
57+
MiniDisassembler::Create(binary_content->object_file.get()));
58+
ASSERT_OK_AND_ASSIGN(llvm::MCInst ret_inst, md->DisassembleOne(0x4008e4));
59+
EXPECT_TRUE(md->MayAffectControlFlow(ret_inst));
60+
}
61+
62+
TEST(MiniDisassemblerTest, CallMayAffectControlFlow) {
63+
const std::string binary =
64+
absl::StrCat(::testing::SrcDir(),
65+
"_main/propeller/testdata/"
66+
"llvm_function_samples.binary");
67+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
68+
GetBinaryContent(binary));
69+
ASSERT_OK_AND_ASSIGN(
70+
std::unique_ptr<MiniDisassembler> md,
71+
MiniDisassembler::Create(binary_content->object_file.get()));
72+
ASSERT_OK_AND_ASSIGN(llvm::MCInst call_inst, md->DisassembleOne(0x4008c9));
73+
EXPECT_TRUE(md->MayAffectControlFlow(call_inst));
74+
}
75+
76+
TEST(MiniDisassemblerTest, BranchMayAffectControlFlow) {
77+
const std::string binary =
78+
absl::StrCat(::testing::SrcDir(),
79+
"_main/propeller/testdata/"
80+
"llvm_function_samples.binary");
81+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
82+
GetBinaryContent(binary));
83+
ASSERT_OK_AND_ASSIGN(
84+
std::unique_ptr<MiniDisassembler> md,
85+
MiniDisassembler::Create(binary_content->object_file.get()));
86+
EXPECT_THAT(md->MayAffectControlFlow(0x4008b6), IsOkAndHolds(true));
87+
}
88+
89+
TEST(MiniDisassemblerTest, PushMayNotAffectControlFlow) {
90+
const std::string binary =
91+
absl::StrCat(::testing::SrcDir(),
92+
"_main/propeller/testdata/"
93+
"llvm_function_samples.binary");
94+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
95+
GetBinaryContent(binary));
96+
ASSERT_OK_AND_ASSIGN(
97+
std::unique_ptr<MiniDisassembler> md,
98+
MiniDisassembler::Create(binary_content->object_file.get()));
99+
ASSERT_OK_AND_ASSIGN(llvm::MCInst push_inst, md->DisassembleOne(0x400590));
100+
EXPECT_FALSE(md->MayAffectControlFlow(push_inst));
101+
}
102+
103+
} // namespace
104+
} // namespace propeller

0 commit comments

Comments
 (0)