Skip to content

Commit

Permalink
Add helper for reading AArch64 thunks from binaries
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 703621699
  • Loading branch information
Propeller Team authored and copybara-github committed Dec 12, 2024
1 parent 42396ed commit 71f5803
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 0 deletions.
3 changes: 3 additions & 0 deletions propeller/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ cc_library(
deps = [
":addr2cu",
":status_macros",
"@abseil-cpp//absl/container:btree",
"@abseil-cpp//absl/container:flat_hash_map",
"@abseil-cpp//absl/log",
"@abseil-cpp//absl/log:check",
Expand All @@ -280,6 +281,7 @@ cc_library(
"@llvm-project//llvm:DebugInfo",
"@llvm-project//llvm:Object",
"@llvm-project//llvm:Support",
"@llvm-project//llvm:TargetParser",
],
)

Expand Down Expand Up @@ -1134,6 +1136,7 @@ cc_test(
name = "binary_content_test",
srcs = ["binary_content_test.cc"],
data = [
"//propeller/testdata:fake_thunks.bin",
"//propeller/testdata:llvm_function_samples.binary",
"//propeller/testdata:propeller_barebone_nopie_buildid",
"//propeller/testdata:propeller_barebone_pie_nobuildid_bin",
Expand Down
35 changes: 35 additions & 0 deletions propeller/binary_content.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <utility>
#include <vector>

#include "absl/container/btree_map.h"
#include "absl/container/flat_hash_map.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
Expand Down Expand Up @@ -35,6 +36,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
#include "propeller/addr2cu.h"
#include "propeller/status_macros.h"

Expand Down Expand Up @@ -239,6 +241,31 @@ absl::Status ELFFileUtil<ELFT>::InitializeKernelModule(
LOG(INFO) << "Found kernel module description: " << desc->second;
return absl::OkStatus();
}

// Returns an AArch64 binary's thunk symbols by reading from its symbol table.
// These are returned as a map from the thunk's address to the thunk symbol.
absl::btree_map<uint64_t, llvm::object::ELFSymbolRef> ReadAArch64ThunkSymbols(
const BinaryContent &binary_content) {
absl::btree_map<uint64_t, llvm::object::ELFSymbolRef> thunk_map;
for (llvm::object::SymbolRef sr : binary_content.object_file->symbols()) {
llvm::object::ELFSymbolRef symbol(sr);
if (symbol.getELFType() != llvm::ELF::STT_FUNC) continue;
llvm::Expected<uint64_t> address = sr.getAddress();
if (!address || !*address) continue;

llvm::Expected<llvm::StringRef> func_name = symbol.getName();
if (!func_name || (!func_name->starts_with("__AArch64ADRPThunk_") &&
!func_name->starts_with("__AArch64AbsLongThunk_"))) {
continue;
}

const uint64_t func_size = symbol.getSize();
if (func_size == 0) continue;

thunk_map.insert({*address, sr});
}
return thunk_map;
}
} // namespace

namespace propeller {
Expand Down Expand Up @@ -279,6 +306,14 @@ ReadSymbolTable(const BinaryContent &binary_content) {
return symtab;
}

absl::btree_map<uint64_t, llvm::object::ELFSymbolRef> ReadThunkSymbols(
const BinaryContent &binary_content) {
if (binary_content.object_file->getArch() == llvm::Triple::aarch64)
return ReadAArch64ThunkSymbols(binary_content);

return {};
}

absl::StatusOr<std::vector<llvm::object::BBAddrMap>> ReadBbAddrMap(
const BinaryContent &binary_content) {
auto *elf_object = llvm::dyn_cast<llvm::object::ELFObjectFileBase>(
Expand Down
7 changes: 7 additions & 0 deletions propeller/binary_content.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <string>
#include <vector>

#include "absl/container/btree_map.h"
#include "absl/container/flat_hash_map.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
Expand Down Expand Up @@ -116,6 +117,12 @@ absl::StatusOr<int64_t> GetSymbolAddress(
absl::flat_hash_map<uint64_t, llvm::SmallVector<llvm::object::ELFSymbolRef>>
ReadSymbolTable(const BinaryContent &binary_content);

// Returns the binary's thunk symbols by reading from its symbol table.
// These are returned as a map from the thunk's address to the thunk symbol.
// Returns an empty map if the architecture does not support thunks.
absl::btree_map<uint64_t, llvm::object::ELFSymbolRef> ReadThunkSymbols(
const BinaryContent &binary_content);

// Returns the binary's `BBAddrMap`s by calling LLVM-side decoding function
// `ELFObjectFileBase::readBBAddrMap`. Returns error if the call fails or if the
// result is empty.
Expand Down
18 changes: 18 additions & 0 deletions propeller/binary_content_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,23 @@ TEST(GetSymbolAddressTest, SymbolNotFound) {
Not(IsOk()));
}

TEST(ThunkSymbolsTest, AArch64Thunks) {
const std::string binary = absl::StrCat(
::testing::SrcDir(),
"_main/propeller/testdata/fake_thunks.bin");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
EXPECT_THAT(ReadThunkSymbols(*binary_content), SizeIs(2));
}

TEST(ThunkSymbolsTest, x86NoThunks) {
const std::string binary = absl::StrCat(
::testing::SrcDir(),
"_main/propeller/testdata/propeller_sample_1.bin");
ASSERT_OK_AND_ASSIGN(std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(binary));
EXPECT_THAT(ReadThunkSymbols(*binary_content), SizeIs(0));
}

} // namespace
} // namespace propeller
28 changes: 28 additions & 0 deletions propeller/testdata/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ exports_files([
"bimodal_sample.x.bin",
"call_from_simple_loop.protobuf",
"clang_v0_labels.binary",
"fake_thunks.bin",
"hot_and_cold_landing_pads.protobuf",
"libro_sample.so",
"llvm_function_samples.binary",
Expand Down Expand Up @@ -362,3 +363,30 @@ genrule(
target_compatible_with = ["//third_party/bazel_platforms/cpu:aarch64"],
tools = [":sample"],
)

cc_binary(
name = "fake_thunks",
srcs = ["fake_thunks.c"],
)

# This rule can be used to manually generate fake_thunks.bin.
# Do not make this into the deps of tests since regenerating the file every time is cumbersome.
#
# To build `fake_thunks_bin`, you need to target Arm. If building with Propeller
# annotations, pass `--host_features=propeller_annotate` when building the genrule.
genrule(
name = "fake_thunks_bin",
srcs = ["fake_thunks.c"],
outs = [
"fake_thunks.bin.gen",
],
cmd = "$(CC) $(CC_FLAGS) -g -O2 -Wl,-build-id -pie -fbasic-block-sections=labels $< -o " +
"$(RULEDIR)/fake_thunks.bin.gen",
exec_compatible_with = ["//third_party/bazel_platforms/cpu:aarch64"],
tags = [
"manual",
"requires-arch:arm",
],
target_compatible_with = ["//third_party/bazel_platforms/cpu:aarch64"],
toolchains = _LLVM_PROPELLER_TESTDATA_TOOLCHAINS,
)
Binary file added propeller/testdata/fake_thunks.bin
Binary file not shown.
21 changes: 21 additions & 0 deletions propeller/testdata/fake_thunks.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/* fake_thunks.c
* Executables that contain thunks require branches > 128 MiB, which is too
* large for the testdata. We use this file to spoof thunks by creating
* functions that have thunk symbol names. However, as actual functions, they
* will have `llvm_bb_addr_map` metadata, so they cannot be treated like thunks
* for all test purposes.
*/
volatile int x = 1;

__attribute__((noinline)) int __AArch64ADRPThunk_test1(int i) { return x + i; }

__attribute__((noinline)) int __AArch64ADRPThunk_test2(int i) {
return x + i + 1;
}

int main(void) {
__AArch64ADRPThunk_test1(x);
__AArch64ADRPThunk_test2(x);

return 0;
}

0 comments on commit 71f5803

Please sign in to comment.