Skip to content

Commit e26b5e4

Browse files
Propeller Teamcopybara-github
Propeller Team
authored andcommitted
Add thunk vector member to BinaryAddressMapper. This will enable resolving
branches to thunks for more accurate CFG computation. PiperOrigin-RevId: 703621698
1 parent 1e1e9de commit e26b5e4

9 files changed

+158
-8
lines changed

Diff for: propeller/BUILD

+3
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ cc_library(
269269
deps = [
270270
":addr2cu",
271271
":status_macros",
272+
"@abseil-cpp//absl/container:btree",
272273
"@abseil-cpp//absl/container:flat_hash_map",
273274
"@abseil-cpp//absl/log",
274275
"@abseil-cpp//absl/log:check",
@@ -280,6 +281,7 @@ cc_library(
280281
"@llvm-project//llvm:DebugInfo",
281282
"@llvm-project//llvm:Object",
282283
"@llvm-project//llvm:Support",
284+
"@llvm-project//llvm:TargetParser",
283285
],
284286
)
285287

@@ -1080,6 +1082,7 @@ cc_test(
10801082
name = "binary_content_test",
10811083
srcs = ["binary_content_test.cc"],
10821084
data = [
1085+
"//propeller/testdata:fake_thunks.bin",
10831086
"//propeller/testdata:llvm_function_samples.binary",
10841087
"//propeller/testdata:propeller_barebone_nopie_buildid",
10851088
"//propeller/testdata:propeller_barebone_pie_nobuildid_bin",

Diff for: propeller/binary_address_mapper.cc

+36-7
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "absl/algorithm/container.h"
1414
#include "absl/base/attributes.h"
1515
#include "absl/base/nullability.h"
16+
#include "absl/container/btree_map.h"
1617
#include "absl/container/btree_set.h"
1718
#include "absl/container/flat_hash_map.h"
1819
#include "absl/container/flat_hash_set.h"
@@ -84,7 +85,9 @@ class BinaryAddressMapperBuilder {
8485
symtab,
8586
std::vector<llvm::object::BBAddrMap> bb_addr_map, PropellerStats &stats,
8687
absl::Nonnull<const PropellerOptions *> options
87-
ABSL_ATTRIBUTE_LIFETIME_BOUND);
88+
ABSL_ATTRIBUTE_LIFETIME_BOUND,
89+
std::optional<absl::btree_map<uint64_t, llvm::object::ELFSymbolRef>>
90+
thunk_map = std::nullopt);
8891

8992
BinaryAddressMapperBuilder(const BinaryAddressMapperBuilder &) = delete;
9093
BinaryAddressMapperBuilder &operator=(const BinaryAddressMapper &) = delete;
@@ -131,6 +134,9 @@ class BinaryAddressMapperBuilder {
131134
int FilterDuplicateNameFunctions(
132135
absl::btree_set<int> &selected_functions) const;
133136

137+
// Create a sorted vector of thunks in the binary from `thunk_map_`.
138+
std::optional<std::vector<ThunkInfo>> GetThunks();
139+
134140
// BB address map of functions.
135141
std::vector<llvm::object::BBAddrMap> bb_addr_map_;
136142
// Non-zero sized function symbols from elf symbol table, indexed by
@@ -144,6 +150,10 @@ class BinaryAddressMapperBuilder {
144150

145151
PropellerStats *stats_;
146152
const PropellerOptions *options_;
153+
154+
// Map of thunks by address.
155+
std::optional<absl::btree_map<uint64_t, llvm::object::ELFSymbolRef>>
156+
thunk_map_;
147157
};
148158

149159
// Helper class for extracting intra-function paths from binary-address paths.
@@ -638,6 +648,17 @@ absl::btree_set<int> BinaryAddressMapperBuilder::SelectFunctions(
638648
return selected_functions;
639649
}
640650

651+
std::optional<std::vector<ThunkInfo>> BinaryAddressMapperBuilder::GetThunks() {
652+
if (!thunk_map_.has_value()) return std::nullopt;
653+
std::vector<ThunkInfo> thunks;
654+
for (const auto &thunk_entry : *thunk_map_) {
655+
uint64_t thunk_address = thunk_entry.first;
656+
llvm::object::ELFSymbolRef thunk_symbol = thunk_entry.second;
657+
thunks.push_back({.address = thunk_address, .symbol = thunk_symbol});
658+
}
659+
return thunks;
660+
}
661+
641662
std::vector<BbHandleBranchPath> BinaryAddressMapper::ExtractIntraFunctionPaths(
642663
const BinaryAddressBranchPath &address_path) const {
643664
return IntraFunctionPathsExtractor(this).Extract(address_path);
@@ -647,12 +668,15 @@ BinaryAddressMapperBuilder::BinaryAddressMapperBuilder(
647668
absl::flat_hash_map<uint64_t, llvm::SmallVector<llvm::object::ELFSymbolRef>>
648669
symtab,
649670
std::vector<llvm::object::BBAddrMap> bb_addr_map, PropellerStats &stats,
650-
absl::Nonnull<const PropellerOptions *> options)
671+
absl::Nonnull<const PropellerOptions *> options,
672+
std::optional<absl::btree_map<uint64_t, llvm::object::ELFSymbolRef>>
673+
thunk_map)
651674
: bb_addr_map_(std::move(bb_addr_map)),
652675
symtab_(std::move(symtab)),
653676
symbol_info_map_(GetSymbolInfoMap(symtab_, bb_addr_map_)),
654677
stats_(&stats),
655-
options_(options) {
678+
options_(options),
679+
thunk_map_(std::move(thunk_map)) {
656680
stats_->bbaddrmap_stats.bbaddrmap_function_does_not_have_symtab_entry +=
657681
bb_addr_map_.size() - symbol_info_map_.size();
658682
}
@@ -661,11 +685,13 @@ BinaryAddressMapper::BinaryAddressMapper(
661685
absl::btree_set<int> selected_functions,
662686
std::vector<llvm::object::BBAddrMap> bb_addr_map,
663687
std::vector<BbHandle> bb_handles,
664-
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map)
688+
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map,
689+
std::optional<std::vector<ThunkInfo>> thunks)
665690
: selected_functions_(std::move(selected_functions)),
666691
bb_handles_(std::move(bb_handles)),
667692
bb_addr_map_(std::move(bb_addr_map)),
668-
symbol_info_map_(std::move(symbol_info_map)) {}
693+
symbol_info_map_(std::move(symbol_info_map)),
694+
thunks_(std::move(thunks)) {}
669695

670696
absl::StatusOr<std::unique_ptr<BinaryAddressMapper>> BuildBinaryAddressMapper(
671697
const PropellerOptions &options, const BinaryContent &binary_content,
@@ -676,14 +702,16 @@ absl::StatusOr<std::unique_ptr<BinaryAddressMapper>> BuildBinaryAddressMapper(
676702
ASSIGN_OR_RETURN(bb_addr_map, ReadBbAddrMap(binary_content));
677703

678704
return BinaryAddressMapperBuilder(ReadSymbolTable(binary_content),
679-
std::move(bb_addr_map), stats, &options)
705+
std::move(bb_addr_map), stats, &options,
706+
ReadThunkSymbols(binary_content))
680707
.Build(hot_addresses);
681708
}
682709

683710
std::unique_ptr<BinaryAddressMapper> BinaryAddressMapperBuilder::Build(
684711
const absl::flat_hash_set<uint64_t> *hot_addresses) && {
685712
std::optional<uint64_t> last_function_address;
686713
std::vector<BbHandle> bb_handles;
714+
std::optional<std::vector<ThunkInfo>> thunks = GetThunks();
687715
absl::btree_set<int> selected_functions = SelectFunctions(hot_addresses);
688716
DropNonSelectedFunctions(selected_functions);
689717
for (int function_index : selected_functions) {
@@ -696,9 +724,10 @@ std::unique_ptr<BinaryAddressMapper> BinaryAddressMapperBuilder::Build(
696724
bb_handles.push_back({function_index, bb_index});
697725
last_function_address = function_bb_addr_map.getFunctionAddress();
698726
}
727+
699728
return std::make_unique<BinaryAddressMapper>(
700729
std::move(selected_functions), std::move(bb_addr_map_),
701-
std::move(bb_handles), std::move(symbol_info_map_));
730+
std::move(bb_handles), std::move(symbol_info_map_), std::move(thunks));
702731
}
703732

704733
} // namespace propeller

Diff for: propeller/binary_address_mapper.h

+16-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "absl/time/time.h"
1919
#include "llvm/ADT/SmallVector.h"
2020
#include "llvm/ADT/StringRef.h"
21+
#include "llvm/Object/ELFObjectFile.h"
2122
#include "llvm/Object/ELFTypes.h"
2223
#include "propeller/bb_handle.h"
2324
#include "propeller/binary_address_branch_path.h"
@@ -103,6 +104,12 @@ struct BbHandleBranchPath {
103104
}
104105
};
105106

107+
struct ThunkInfo {
108+
uint64_t address;
109+
uint64_t target;
110+
llvm::object::ELFSymbolRef symbol;
111+
};
112+
106113
// Finds basic block entries from binary addresses.
107114
class BinaryAddressMapper {
108115
public:
@@ -120,7 +127,8 @@ class BinaryAddressMapper {
120127
absl::btree_set<int> selected_functions,
121128
std::vector<llvm::object::BBAddrMap> bb_addr_map,
122129
std::vector<BbHandle> bb_handles,
123-
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map);
130+
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map,
131+
std::optional<std::vector<ThunkInfo>> thunks = std::nullopt);
124132

125133
BinaryAddressMapper(const BinaryAddressMapper &) = delete;
126134
BinaryAddressMapper &operator=(const BinaryAddressMapper &) = delete;
@@ -141,6 +149,10 @@ class BinaryAddressMapper {
141149
return selected_functions_;
142150
}
143151

152+
const std::optional<std::vector<ThunkInfo>> &thunks() const {
153+
return thunks_;
154+
}
155+
144156
// Returns the `bb_handles_` index associated with the binary address
145157
// `address` given a branch from/to this address based on `direction`.
146158
// It returns nullopt if the no `bb_handles_` index can be mapped.
@@ -268,6 +280,9 @@ class BinaryAddressMapper {
268280
// A map from function indices to their symbol info (function names and
269281
// section name).
270282
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map_;
283+
284+
// A vector of thunks in the binary, ordered in increasing order of address.
285+
std::optional<std::vector<ThunkInfo>> thunks_;
271286
};
272287

273288
// Builds a `BinaryAddressMapper` for binary represented by `binary_content` and

Diff for: propeller/binary_content.cc

+34
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <utility>
99
#include <vector>
1010

11+
#include "absl/container/btree_map.h"
1112
#include "absl/container/flat_hash_map.h"
1213
#include "absl/log/check.h"
1314
#include "absl/log/log.h"
@@ -35,6 +36,7 @@
3536
#include "llvm/Support/MemoryBuffer.h"
3637
#include "llvm/Support/MemoryBufferRef.h"
3738
#include "llvm/Support/raw_ostream.h"
39+
#include "llvm/TargetParser/Triple.h"
3840
#include "propeller/addr2cu.h"
3941
#include "propeller/status_macros.h"
4042

@@ -241,6 +243,30 @@ absl::Status ELFFileUtil<ELFT>::InitializeKernelModule(
241243
}
242244
} // namespace
243245

246+
// Read AArch64 thunks from the symbol table and store them in sorted order.
247+
absl::btree_map<uint64_t, llvm::object::ELFSymbolRef> ReadAArch64ThunkSymbols(
248+
const BinaryContent &binary_content) {
249+
absl::btree_map<uint64_t, llvm::object::ELFSymbolRef> thunk_map;
250+
for (llvm::object::SymbolRef sr : binary_content.object_file->symbols()) {
251+
llvm::object::ELFSymbolRef symbol(sr);
252+
uint8_t stt = symbol.getELFType();
253+
if (stt != llvm::ELF::STT_FUNC) continue;
254+
llvm::Expected<uint64_t> address = sr.getAddress();
255+
if (!address || !*address) continue;
256+
257+
llvm::Expected<llvm::StringRef> func_name = symbol.getName();
258+
if (!func_name || (!func_name->starts_with("__AArch64ADRPThunk_") &&
259+
!func_name->starts_with("__AArch64AbsLongThunk_")))
260+
continue;
261+
262+
const uint64_t func_size = symbol.getSize();
263+
if (func_size == 0) continue;
264+
265+
thunk_map.insert({*address, sr});
266+
}
267+
return thunk_map;
268+
}
269+
244270
namespace propeller {
245271
absl::flat_hash_map<uint64_t, llvm::SmallVector<llvm::object::ELFSymbolRef>>
246272
ReadSymbolTable(const BinaryContent &binary_content) {
@@ -279,6 +305,14 @@ ReadSymbolTable(const BinaryContent &binary_content) {
279305
return symtab;
280306
}
281307

308+
std::optional<absl::btree_map<uint64_t, llvm::object::ELFSymbolRef>>
309+
ReadThunkSymbols(const BinaryContent &binary_content) {
310+
if (binary_content.object_file->getArch() == llvm::Triple::aarch64)
311+
return ::ReadAArch64ThunkSymbols(binary_content);
312+
313+
return std::nullopt;
314+
}
315+
282316
absl::StatusOr<std::vector<llvm::object::BBAddrMap>> ReadBbAddrMap(
283317
const BinaryContent &binary_content) {
284318
auto *elf_object = llvm::dyn_cast<llvm::object::ELFObjectFileBase>(

Diff for: propeller/binary_content.h

+9
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <string>
88
#include <vector>
99

10+
#include "absl/container/btree_map.h"
1011
#include "absl/container/flat_hash_map.h"
1112
#include "absl/status/status.h"
1213
#include "absl/status/statusor.h"
@@ -116,6 +117,14 @@ absl::StatusOr<int64_t> GetSymbolAddress(
116117
absl::flat_hash_map<uint64_t, llvm::SmallVector<llvm::object::ELFSymbolRef>>
117118
ReadSymbolTable(const BinaryContent &binary_content);
118119

120+
// Returns an AArch64 binary's thunk symbols by reading from its symbol table.
121+
absl::btree_map<uint64_t, llvm::object::ELFSymbolRef> ReadAArch64ThunkSymbols(
122+
const BinaryContent &binary_content);
123+
124+
// Returns the binary's thunk symbols by reading from its symbol table.
125+
std::optional<absl::btree_map<uint64_t, llvm::object::ELFSymbolRef>>
126+
ReadThunkSymbols(const BinaryContent &binary_content);
127+
119128
// Returns the binary's `BBAddrMap`s by calling LLVM-side decoding function
120129
// `ELFObjectFileBase::readBBAddrMap`. Returns error if the call fails or if the
121130
// result is empty.

Diff for: propeller/binary_content_test.cc

+18
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,23 @@ TEST(GetSymbolAddressTest, SymbolNotFound) {
6969
Not(IsOk()));
7070
}
7171

72+
TEST(ThunkSymbolsTest, AArch64Thunks) {
73+
const std::string binary = absl::StrCat(
74+
::testing::SrcDir(),
75+
"_main/propeller/testdata/fake_thunks.bin");
76+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
77+
GetBinaryContent(binary));
78+
EXPECT_THAT(ReadThunkSymbols(*binary_content), Optional(SizeIs(2)));
79+
}
80+
81+
TEST(ThunkSymbolsTest, x86NoThunks) {
82+
const std::string binary = absl::StrCat(
83+
::testing::SrcDir(),
84+
"_main/propeller/testdata/propeller_sample_1.bin");
85+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
86+
GetBinaryContent(binary));
87+
EXPECT_THAT(ReadThunkSymbols(*binary_content), Eq(std::nullopt));
88+
}
89+
7290
} // namespace
7391
} // namespace propeller

Diff for: propeller/testdata/BUILD

+23
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ exports_files([
3636
"bimodal_sample.x.bin",
3737
"call_from_simple_loop.protobuf",
3838
"clang_v0_labels.binary",
39+
"fake_thunks.bin",
3940
"hot_and_cold_landing_pads.protobuf",
4041
"libro_sample.so",
4142
"llvm_function_samples.binary",
@@ -352,3 +353,25 @@ genrule(
352353
target_compatible_with = ["//third_party/bazel_platforms/cpu:aarch64"],
353354
tools = [":sample"],
354355
)
356+
357+
cc_binary(
358+
name = "fake_thunks",
359+
srcs = ["fake_thunks.c"],
360+
)
361+
362+
genrule(
363+
name = "fake_thunks_bin",
364+
srcs = ["fake_thunks.c"],
365+
outs = [
366+
"fake_thunks.bin.gen",
367+
],
368+
cmd = "$(CC) $(CC_FLAGS) -g -O2 -Wl,-build-id -pie -fbasic-block-sections=labels $< -o " +
369+
"$(RULEDIR)/fake_thunks.bin.gen",
370+
exec_compatible_with = ["//third_party/bazel_platforms/cpu:aarch64"],
371+
tags = [
372+
"manual",
373+
"requires-arch:arm",
374+
],
375+
target_compatible_with = ["//third_party/bazel_platforms/cpu:aarch64"],
376+
toolchains = _LLVM_PROPELLER_TESTDATA_TOOLCHAINS,
377+
)

Diff for: propeller/testdata/fake_thunks.bin

11.5 KB
Binary file not shown.

Diff for: propeller/testdata/fake_thunks.c

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/* fake_thunks.c */
2+
volatile int x = 1;
3+
4+
__attribute__((noinline)) int __AArch64ADRPThunk_test1(int i) {
5+
return x + i;
6+
}
7+
8+
__attribute__((noinline)) int __AArch64ADRPThunk_test2(int i) {
9+
return x + i + 1;
10+
}
11+
12+
int sample1_func() { return 13; }
13+
14+
int main(void) {
15+
__AArch64ADRPThunk_test1(x);
16+
__AArch64ADRPThunk_test2(x);
17+
18+
return 0;
19+
}

0 commit comments

Comments
 (0)