From ff4b48a8a6f062955e3f49cd9dba54624173a974 Mon Sep 17 00:00:00 2001 From: Will Hawkins Date: Fri, 17 May 2024 15:24:59 -0400 Subject: [PATCH] Support external stacks and local function calls using local memory (#460) 1. Add support for invoking the interpreter and JIT'd code with an external stack. This feature is generally useful and will also make it easier to fuzz the runtime and check for correctness. 2. Add support for local functions that use local memory. Prior to this commit, a local function could be called but could not use any local memory (without overwriting memory from another function). Signed-off-by: Will Hawkins --- .../ubpf_test_external_stack_contents.input | 1 + .../data/ubpf_test_frame_pointer.input | 1 + .../ubpf_test_external_stack_contents.md | 59 ++++ .../descrs/ubpf_test_frame_pointer.md | 4 + .../srcs/ubpf_test_external_stack_contents.cc | 74 +++++ custom_tests/srcs/ubpf_test_frame_pointer.cc | 103 +++++++ external/bpf_conformance | 2 +- tests/call-save.data | 55 ++++ ubpf_plugin/ubpf_plugin.cc | 82 +++++- vm/inc/ubpf.h | 131 ++++++++- vm/test.c | 20 ++ vm/ubpf_int.h | 81 ++++-- vm/ubpf_jit.c | 42 ++- vm/ubpf_jit_arm64.c | 195 +++++++------ vm/ubpf_jit_support.c | 22 +- vm/ubpf_jit_support.h | 25 +- vm/ubpf_jit_x86_64.c | 97 +++++-- vm/ubpf_vm.c | 258 ++++++++++++------ 18 files changed, 1002 insertions(+), 250 deletions(-) create mode 100644 custom_tests/data/ubpf_test_external_stack_contents.input create mode 100644 custom_tests/data/ubpf_test_frame_pointer.input create mode 100644 custom_tests/descrs/ubpf_test_external_stack_contents.md create mode 100644 custom_tests/descrs/ubpf_test_frame_pointer.md create mode 100644 custom_tests/srcs/ubpf_test_external_stack_contents.cc create mode 100644 custom_tests/srcs/ubpf_test_frame_pointer.cc diff --git a/custom_tests/data/ubpf_test_external_stack_contents.input b/custom_tests/data/ubpf_test_external_stack_contents.input new file mode 100644 index 000000000..378996b96 --- /dev/null +++ b/custom_tests/data/ubpf_test_external_stack_contents.input @@ -0,0 +1 @@ +72 0a fc ff 01 00 00 00 72 0a fd ff 02 00 00 00 72 0a fe ff 03 00 00 00 72 0a ff ff 04 00 00 00 85 10 00 00 02 00 00 00 b7 00 00 00 00 00 00 00 95 00 00 00 00 00 00 00 72 0a fc ff 05 00 00 00 72 0a fd ff 06 00 00 00 72 0a fe ff 07 00 00 00 72 0a ff ff 08 00 00 00 95 00 00 00 00 00 00 00 diff --git a/custom_tests/data/ubpf_test_frame_pointer.input b/custom_tests/data/ubpf_test_frame_pointer.input new file mode 100644 index 000000000..222f3f580 --- /dev/null +++ b/custom_tests/data/ubpf_test_frame_pointer.input @@ -0,0 +1 @@ +b7 06 00 00 0a 00 00 00 b7 07 00 00 0a 00 00 00 b7 08 00 00 0a 00 00 00 b7 09 00 00 0a 00 00 00 b7 01 00 00 05 00 00 00 7b 1a f8 ff 00 00 00 00 85 10 00 00 02 00 00 00 79 a0 f8 ff 00 00 00 00 95 00 00 00 00 00 00 00 b7 01 00 00 37 00 00 00 7b 1a f8 ff 00 00 00 00 95 00 00 00 00 00 00 00 diff --git a/custom_tests/descrs/ubpf_test_external_stack_contents.md b/custom_tests/descrs/ubpf_test_external_stack_contents.md new file mode 100644 index 000000000..5ebecc4c6 --- /dev/null +++ b/custom_tests/descrs/ubpf_test_external_stack_contents.md @@ -0,0 +1,59 @@ +## Test Description + +This custom test guarantees that the eBPF program's manipulation of its stack has the intended effect. The eBPF program is JIT'd under the assumption that each of the functions require 16 bytes of space. The test guarantees that by returning `16` from `stack_usage_calculator` which is registered as the callback that will determine the stack usage of a local function. + +### eBPF Program Source + +``` +stb [%r10-4], 0x01 +stb [%r10-3], 0x02 +stb [%r10-2], 0x03 +stb [%r10-1], 0x04 +call local inner +mov %r0, 0 +exit +inner: +stb [%r10-4], 0x05 +stb [%r10-3], 0x06 +stb [%r10-2], 0x07 +stb [%r10-1], 0x08 +exit +``` + +### Expected Behavior + +Given the size of the stack usage for each function (see above), the contents of the memory at the end of the program will be: + +``` +0x00: 0x00 +0x01: 0x00 +0x02: 0x00 +0x03: 0x00 +0x04: 0x00 +0x05: 0x00 +0x06: 0x00 +0x07: 0x00 +0x08: 0x00 +0x09: 0x00 +0x0a: 0x00 +0x0b: 0x05 +0x0c: 0x06 +0x0d: 0x07 +0x0e: 0x08 +0x0f: 0x00 +0x10: 0x00 +0x11: 0x00 +0x12: 0x00 +0x13: 0x00 +0x14: 0x00 +0x15: 0x00 +0x16: 0x00 +0x17: 0x00 +0x18: 0x00 +0x19: 0x00 +0x1a: 0x00 +0x1b: 0x01 +0x1c: 0x02 +0x1d: 0x03 +0x1e: 0x04 +``` diff --git a/custom_tests/descrs/ubpf_test_frame_pointer.md b/custom_tests/descrs/ubpf_test_frame_pointer.md new file mode 100644 index 000000000..e0a872ad3 --- /dev/null +++ b/custom_tests/descrs/ubpf_test_frame_pointer.md @@ -0,0 +1,4 @@ +## Test Description + +This custom test program tests whether it is possible to update the external helper +functions for an eBPF program that has already been JIT'd. diff --git a/custom_tests/srcs/ubpf_test_external_stack_contents.cc b/custom_tests/srcs/ubpf_test_external_stack_contents.cc new file mode 100644 index 000000000..23a930f60 --- /dev/null +++ b/custom_tests/srcs/ubpf_test_external_stack_contents.cc @@ -0,0 +1,74 @@ +// Copyright (c) Will Hawkins +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include +#include + +extern "C" +{ +#include "ubpf.h" +} + +#include "ubpf_custom_test_support.h" + +int +stack_usage_calculator(const struct ubpf_vm* vm, uint16_t pc, void* cookie) +{ + UNREFERENCED_PARAMETER(vm); + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + return 16; +} + +int +main(int argc, char** argv) +{ + std::vector args(argv, argv + argc); + std::string program_string{}; + ubpf_jit_fn jit_fn; + + std::getline(std::cin, program_string); + + const size_t stack_size{32}; + uint8_t expected_result[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, + }; + + bool success = true; + + std::unique_ptr vm(ubpf_create(), ubpf_destroy); + std::string error{}; + if (!ubpf_setup_custom_test( + vm, + program_string, + [](ubpf_vm_up& vm, std::string& error) { + if (ubpf_register_stack_usage_calculator(vm.get(), stack_usage_calculator, nullptr) < 0) { + error = "Failed to register stack usage calculator."; + return false; + } + return true; + }, + jit_fn, + error)) { + std::cerr << "Problem setting up custom test: " << error << std::endl; + return 1; + } + + char* ex_jit_compile_error = nullptr; + auto jit_ex_fn = ubpf_compile_ex(vm.get(), &ex_jit_compile_error, ExtendedJitMode); + uint8_t external_stack[stack_size] = { + 0, + }; + jit_ex_fn(nullptr, 0, external_stack, stack_size); + + for (size_t i = 0; i < stack_size; i++) { + if (external_stack[i] != expected_result[i]) { + success = false; + } + } + return !success; +} diff --git a/custom_tests/srcs/ubpf_test_frame_pointer.cc b/custom_tests/srcs/ubpf_test_frame_pointer.cc new file mode 100644 index 000000000..7c19c0412 --- /dev/null +++ b/custom_tests/srcs/ubpf_test_frame_pointer.cc @@ -0,0 +1,103 @@ +// Copyright (c) Will Hawkins +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include +#include + +extern "C" +{ +#include "ubpf.h" +} + +#include "ubpf_custom_test_support.h" + +int +stack_usage_calculator(const struct ubpf_vm* vm, uint16_t pc, void* cookie) +{ + UNREFERENCED_PARAMETER(vm); + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + return 16; +} + +int +overwrite_stack_usage_calculator(const struct ubpf_vm* vm, uint16_t pc, void* cookie) +{ + UNREFERENCED_PARAMETER(vm); + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + return 0; +} + +int +main(int argc, char** argv) +{ + std::vector args(argv, argv + argc); + std::string program_string{}; + ubpf_jit_fn jit_fn; + + std::getline(std::cin, program_string); + + uint64_t no_overwrite_interp_result = 0; + uint64_t no_overwrite_jit_result = 0; + uint64_t overwrite_interp_result = 0; + uint64_t overwrite_jit_result = 0; + + { + + std::unique_ptr vm(ubpf_create(), ubpf_destroy); + std::string error{}; + if (!ubpf_setup_custom_test( + vm, + program_string, + [](ubpf_vm_up& vm, std::string& error) { + if (ubpf_register_stack_usage_calculator(vm.get(), stack_usage_calculator, nullptr) < 0) { + error = "Failed to register stack usage calculator."; + return false; + } + return true; + }, + jit_fn, + error)) { + std::cerr << "Problem setting up custom test: " << error << std::endl; + return 1; + } + + no_overwrite_jit_result = jit_fn(nullptr, 0); + [[maybe_unused]] auto exec_result = ubpf_exec(vm.get(), NULL, 0, &no_overwrite_interp_result); + } + + { + + std::unique_ptr vm(ubpf_create(), ubpf_destroy); + std::string error{}; + if (!ubpf_setup_custom_test( + vm, + program_string, + [](ubpf_vm_up& vm, std::string& error) { + if (ubpf_register_stack_usage_calculator(vm.get(), overwrite_stack_usage_calculator, nullptr) < 0) { + error = "Failed to register stack usage calculator."; + return false; + } + return true; + }, + jit_fn, + error)) { + std::cerr << "Problem setting up custom test: " << error << std::endl; + return 1; + } + + overwrite_jit_result = jit_fn(nullptr, 0); + + [[maybe_unused]] auto exec_result = ubpf_exec(vm.get(), NULL, 0, &overwrite_interp_result); + } + // ... because of the semantics of external_dispatcher, the result of the eBPF + // program execution should point to the same place to which &memory points. + return !( + no_overwrite_interp_result == no_overwrite_jit_result && no_overwrite_interp_result == 0x5 && + overwrite_interp_result == overwrite_jit_result && overwrite_interp_result == 0x37); +} diff --git a/external/bpf_conformance b/external/bpf_conformance index 3566334b7..c4ce8f5a4 160000 --- a/external/bpf_conformance +++ b/external/bpf_conformance @@ -1 +1 @@ -Subproject commit 3566334b7dd99c305eb45f161a40b94441451f4e +Subproject commit c4ce8f5a4afc628637e4332006732d5c91f86dbe diff --git a/tests/call-save.data b/tests/call-save.data index bbf5300a8..423947ff9 100644 --- a/tests/call-save.data +++ b/tests/call-save.data @@ -3,13 +3,68 @@ mov %r6, 0x0001 mov %r7, 0x0020 mov %r8, 0x0300 mov %r9, 0x4000 + +# r1 should contain pointer to program memory. +# Don't screw that up because helper function 1 (memfrob) +# needs it. +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 1 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +# Call helper function 0 -- the memory pointer is +# no longer needed for any other helper functions, so +# we don't have to worry about keeping it safe. +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 0 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 call 2 mov %r0, 0 or %r0, %r6 or %r0, %r7 or %r0, %r8 or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 3 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 exit +-- mem +01 02 03 04 05 06 07 08 -- result 0x4321 -- no register offset diff --git a/ubpf_plugin/ubpf_plugin.cc b/ubpf_plugin/ubpf_plugin.cc index ec48ff175..6246c7d9f 100644 --- a/ubpf_plugin/ubpf_plugin.cc +++ b/ubpf_plugin/ubpf_plugin.cc @@ -73,6 +73,26 @@ bytes_to_ebpf_inst(std::vector bytes) return instructions; } +/** + * @brief The handler to determine the stack usage of local functions. + * + * @param[in] vm Pointer to the VM of which the local function at pc is a part. + * @param[in] pc The instruction address of the local function. + * @param[in] cookie A pointer to the context cookie given when this callback + * was registered. + * @return The amount of stack used by the local function starting at pc. + */ +int +stack_usage_calculator(const struct ubpf_vm* vm, uint16_t pc, void* cookie) +{ + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + UNREFERENCED_PARAMETER(vm); + // We will default to a conservative 64 bytes of stack usage for each local function. + // That should be enough for all the conformance tests. + return 64; +} + /** * @brief This program reads BPF instructions from stdin and memory contents from * the first agument. It then executes the BPF program and prints the @@ -138,6 +158,8 @@ int main(int argc, char **argv) ubpf_register_external_dispatcher(vm.get(), test_helpers_dispatcher, test_helpers_validater); + ubpf_register_stack_usage_calculator(vm.get(), stack_usage_calculator, nullptr); + if (ubpf_set_unwind_function_index(vm.get(), 5) != 0) { std::cerr << "Failed to set unwind function index" << std::endl; @@ -212,12 +234,33 @@ int main(int argc, char **argv) } copy_result = fn(usable_program_memory_pointer, usable_program_memory.size()); + ubpf_jit_ex_fn fn_ex = ubpf_compile_ex(vm.get(), &error, ExtendedJitMode); + if (fn_ex == nullptr) { + std::cerr << "Failed to compile program (extended): " << error << std::endl; + free(error); + return 1; + } + + uint64_t index_helper_result_external_stack; + usable_program_memory = memory; + usable_program_memory_pointer = nullptr; + if (usable_program_memory.size() != 0) { + usable_program_memory_pointer = usable_program_memory.data(); + } + + uint8_t* external_stack = (uint8_t*)calloc(512, 1); + index_helper_result_external_stack = + fn_ex(usable_program_memory_pointer, usable_program_memory.size(), external_stack, 512); + free(external_stack); + // ... and make sure the results are the same. - if (external_dispatcher_result != index_helper_result || index_helper_result != copy_result) { + if (external_dispatcher_result != index_helper_result || index_helper_result != copy_result || + external_dispatcher_result != index_helper_result_external_stack) { std::cerr << "Execution of the JIT'd code (with external and indexed helpers) and a copy of " - "the JIT'd code gave different results: 0x" << std::hex << external_dispatcher_result - << " vs 0x" << std::hex << index_helper_result - << " vs 0x" << std::hex << copy_result << "." << std::endl; + "the JIT'd code gave different results: 0x" + << std::hex << external_dispatcher_result << " vs 0x" << std::hex << index_helper_result + << " vs 0x" << std::hex << copy_result << " vs 0x" << std::hex + << index_helper_result_external_stack << "." << std::endl; return 1; } } @@ -246,7 +289,7 @@ int main(int argc, char **argv) } } - // ... but first reset program memory. + // ... but first reset program memory ... usable_program_memory = memory; usable_program_memory_pointer = nullptr; if (usable_program_memory.size() != 0) { @@ -260,6 +303,35 @@ int main(int argc, char **argv) return 1; } + // ... and, for the cherry on the sundae, execute the program by specifying a stack ... + uint64_t* external_stack = NULL; + + external_stack = (uint64_t*)calloc(512, 1); + if (!external_stack) { + return -1; + } + + // ... but first, reset that pesky memory again ... + usable_program_memory = memory; + usable_program_memory_pointer = nullptr; + if (usable_program_memory.size() != 0) { + usable_program_memory_pointer = usable_program_memory.data(); + } + + uint64_t external_memory_index_helper_result; + if (ubpf_exec_ex( + vm.get(), + usable_program_memory_pointer, + usable_program_memory.size(), + &external_memory_index_helper_result, + (uint8_t*)external_stack, + 512) != 0) { + std::cerr << "Failed to execute program" << std::endl; + return 1; + } + + free(external_stack); + // ... and make sure the results are the same. if (external_dispatcher_result != index_helper_result) { std::cerr << "Execution of the interpreted code with external and indexed helpers gave difference results: 0x" diff --git a/vm/inc/ubpf.h b/vm/inc/ubpf.h index 6ee77cbe0..c45552258 100644 --- a/vm/inc/ubpf.h +++ b/vm/inc/ubpf.h @@ -40,12 +40,14 @@ extern "C" #endif /** - * @brief Default stack size for the VM. Must be divisible by 16. + * @brief Default stack size for the eBPF program. Must be divisible by 16. */ -#if !defined(UBPF_STACK_SIZE) -#define UBPF_STACK_SIZE 512 +#if !defined(UBPF_EBPF_STACK_SIZE) +#define UBPF_EBPF_STACK_SIZE 512 #endif +#define UBPF_EBPF_NONVOLATILE_SIZE (sizeof(uint64_t) * 5) + /** * @brief Default maximum number of nested calls in the VM. */ @@ -63,6 +65,35 @@ extern "C" */ typedef uint64_t (*ubpf_jit_fn)(void* mem, size_t mem_len); + /** + * @brief Opaque type for a uBPF JIT compiled function with + * external stack. + */ + typedef uint64_t (*ubpf_jit_ex_fn)(void* mem, size_t mem_len, uint8_t* stack, size_t stack_len); + + /** + * @brief Enum to describe JIT mode. + * + * ExtendedJitMode specifies that an invocation of that code have 4 parameters: + * 1. A pointer to the program's memory space. + * 2. The size of the program's memory space. + * 3. A pointer to memory to be used by the program as a stack during execution. + * 4. The size of the provided stack space. + * See ubpf_jit_ex_fn for more information. + * + * BasicJitMode specifies that an invocation of that code have 2 parameters: + * 1. A pointer to the program's memory space. + * 2. The size of the program's memory space. + * The function generated by the JITer executing in basic mode automatically + * allocates a stack for the program's execution. + * See ubpf_jit_fn for more information. + */ + enum JitMode + { + ExtendedJitMode, + BasicJitMode + }; + /** * @brief Create a new uBPF VM. * @@ -160,9 +191,40 @@ extern "C" */ int ubpf_register_external_dispatcher( - struct ubpf_vm* vm, - external_function_dispatcher_t dispatcher, - external_function_validate_t validater); + struct ubpf_vm* vm, external_function_dispatcher_t dispatcher, external_function_validate_t validater); + + /** + * @brief The type of a stack usage calculator callback function. + * + * See ubpf_register_stack_usage_calculator for additional information. + */ + typedef int (*stack_usage_calculator_t)(const struct ubpf_vm* vm, uint16_t pc, void* cookie); + + /** + * @brief Register a function that will be called during eBPF program validation + * to determine stack usage for a local function. + * + * In eBPF, the frame pointer is a read-only register. Therefore, the eBPF interpreter + * or the eBPF JITer need to know the stack usage for each local function so that the + * frame pointer can be adjusted properly on behalf of the calling function. The callback + * registered here has access to a cookie for context (specified in the call to this function), + * the PC (in the eBPF program) of the first instruction of a local function and the `ubpf_vm`. + * + * The callback's job is to calculate the amount of stack space used by the local function that + * starts at the given PC. + * + * If the callback returns 0 or there is no callback registered, the eBPF interpreter/JITer + * assume that the local function uses the maximum stack available according to the spec (512K). + * + * @param[in] vm The VM to register the callback with. + * @param[in] dispatcher The callback that will be invoked to determine the amount of stack + * usage for a local function that starts at ... + * @param[in] pc The pc of the function whose stack usage the callback must caculate. + * @retval 0 Success. + * @retval -1 Failure. + */ + int + ubpf_register_stack_usage_calculator(struct ubpf_vm* vm, stack_usage_calculator_t calculator, void* cookie); /** * @brief Load code into a VM. @@ -268,6 +330,15 @@ extern "C" int ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value); + int + ubpf_exec_ex( + const struct ubpf_vm* vm, + void* mem, + size_t mem_len, + uint64_t* bpf_return_value, + uint8_t* stack, + size_t stack_len); + /** * @brief Compile a BPF program in the VM to native code. * @@ -275,6 +346,8 @@ extern "C" * the external helper dispatcher) must be registered before calling this * function. * + * The JITer executes in basic mode when invoked through this function. + * * @param[in] vm The VM to compile the program in. * @param[out] errmsg The error message, if any. This should be freed by the caller. * @return ubpf_jit_fn A pointer to the compiled program, or NULL on failure. @@ -282,19 +355,41 @@ extern "C" ubpf_jit_fn ubpf_compile(struct ubpf_vm* vm, char** errmsg); + /** + * @brief Compile a BPF program in the VM to native code. + * + * A program must be loaded into the VM and all external functions (or + * the external helper dispatcher) must be registered before calling this + * function. + * + * The JITer executes in the prescribed mode when invoked through this function. + * If jit_mode is basic, the caller will have to cast the function pointer to the + * appropriate type (ubpf_jit_fn). + * + * @param[in] vm The VM to compile the program in. + * @param[out] errmsg The error message, if any. This should be freed by the caller. + * @param[in] jit_mode The mode in which to execute the JITer -- basic or extended. + * @return ubpf_jit_fn A pointer to the compiled program, or NULL on failure. + */ + ubpf_jit_ex_fn + ubpf_compile_ex(struct ubpf_vm* vm, char** errmsg, enum JitMode jit_mode); + /** * @brief Copy the JIT'd program code to the given buffer. * * A program must have been loaded into the VM and already JIT'd before * calling this function. * + * Note: Caller must know the mode in which the JITer was executed and may + * need to cast the result to the appropriate type (e.g., ubpf_jit_ex_fn). + * * @param[in] vm The VM of the already JIT'd program. * @param[out] errmsg The error message, if any. This should be freed by the caller. * @return ubpf_jit_fn A pointer to the compiled program (the same as buffer), or * NULL on failure. */ ubpf_jit_fn - ubpf_copy_jit(struct ubpf_vm* vm, void *buffer, size_t size, char** errmsg); + ubpf_copy_jit(struct ubpf_vm* vm, void* buffer, size_t size, char** errmsg); /** * @brief Translate the eBPF byte code to machine code. @@ -302,6 +397,8 @@ extern "C" * A program must be loaded into the VM and all external functions must be * registered before calling this function. * + * The JITer executes in basic mode when invoked through this function. + * * @param[in] vm The VM to translate the program in. * @param[out] buffer The buffer to store the translated code in. * @param[in] size The size of the buffer. @@ -312,6 +409,25 @@ extern "C" int ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg); + /** + * @brief Translate the eBPF byte code to machine code. + * + * A program must be loaded into the VM and all external functions must be + * registered before calling this function. + * + * The JITer executes in the prescribed mode when invoked through this function. + * + * @param[in] vm The VM to translate the program in. + * @param[out] buffer The buffer to store the translated code in. + * @param[in] size The size of the buffer. + * @param[out] errmsg The error message, if any. This should be freed by the caller. + * @param[in] jit_mode The mode in which to execute the JITer -- basic or extended. + * @retval 0 Success. + * @retval -1 Failure. + */ + int + ubpf_translate_ex(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg, enum JitMode jit_mode); + /** * @brief Instruct the uBPF runtime to apply unwind-on-success semantics to a helper function. * If the function returns 0, the uBPF runtime will end execution of @@ -428,7 +544,6 @@ extern "C" int ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previous_limit); - #ifdef __cplusplus } #endif diff --git a/vm/test.c b/vm/test.c index 80f4d1000..fa50b413d 100644 --- a/vm/test.c +++ b/vm/test.c @@ -176,6 +176,25 @@ map_relocation_bounds_check_function(void* user_context, uint64_t addr, uint64_t } return false; } +/** + * @brief The handler to determine the stack usage of local functions. + * + * @param[in] vm Pointer to the VM of which the local function at pc is a part. + * @param[in] pc The instruction address of the local function. + * @param[in] cookie A pointer to the context cookie given when this callback + * was registered. + * @return The amount of stack used by the local function starting at pc. + */ +int +stack_usage_calculator(const struct ubpf_vm* vm, uint16_t pc, void* cookie) +{ + (void)(pc); + (void)(cookie); + (void)(vm); + // This is sized large enough that the rel_64_32.bpf.c program has enough space + // for each local function! + return 32; +} int main(int argc, char** argv) @@ -283,6 +302,7 @@ main(int argc, char** argv) register_functions(vm); + ubpf_register_stack_usage_calculator(vm, stack_usage_calculator, NULL); /* * The ELF magic corresponds to an RSH instruction with an offset, * which is invalid. diff --git a/vm/ubpf_int.h b/vm/ubpf_int.h index 1389886b3..5f6bc532d 100644 --- a/vm/ubpf_int.h +++ b/vm/ubpf_int.h @@ -21,6 +21,7 @@ #ifndef UBPF_INT_H #define UBPF_INT_H +#include #include #include "ebpf.h" @@ -29,16 +30,25 @@ struct ebpf_inst; typedef uint64_t (*ext_func)(uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4); -typedef enum { +typedef enum +{ UBPF_JIT_COMPILE_SUCCESS, UBPF_JIT_COMPILE_FAILURE, } upbf_jit_result_t; -struct ubpf_jit_result { +struct ubpf_jit_result +{ uint32_t external_dispatcher_offset; uint32_t external_helper_offset; upbf_jit_result_t compile_result; - char *errmsg; + enum JitMode jit_mode; + char* errmsg; +}; + +struct ubpf_stack_usage +{ + bool stack_usage_calculated; + uint16_t stack_usage; }; #define MAX_EXT_FUNCS 64 @@ -47,7 +57,7 @@ struct ubpf_vm { struct ebpf_inst* insts; uint16_t num_insts; - ubpf_jit_fn jitted; + ubpf_jit_ex_fn jitted; size_t jitted_size; size_t jitter_buffer_size; struct ubpf_jit_result jitted_result; @@ -56,14 +66,24 @@ struct ubpf_vm bool* int_funcs; const char** ext_func_names; + struct ubpf_stack_usage* local_func_stack_usage; + void* stack_usage_calculator_cookie; + stack_usage_calculator_t stack_usage_calculator; + external_function_dispatcher_t dispatcher; external_function_validate_t dispatcher_validate; bool bounds_check_enabled; int (*error_printf)(FILE* stream, const char* format, ...); - struct ubpf_jit_result (*jit_translate)(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); - bool (*jit_update_dispatcher)(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); - bool (*jit_update_helper)(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + struct ubpf_jit_result (*jit_translate)(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); + bool (*jit_update_dispatcher)( + struct ubpf_vm* vm, + external_function_dispatcher_t new_dispatcher, + uint8_t* buffer, + size_t size, + uint32_t offset); + bool (*jit_update_helper)( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); int unwind_stack_extension_index; uint64_t pointer_secret; ubpf_data_relocation data_relocation_function; @@ -78,29 +98,42 @@ struct ubpf_vm struct ubpf_stack_frame { + uint16_t stack_usage; uint16_t return_address; - uint64_t saved_registers[4]; + uint64_t saved_registers[5]; }; /* The various JIT targets. */ // arm64 struct ubpf_jit_result -ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); - -//x86_64 +ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_arm64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_arm64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + +// x86_64 struct ubpf_jit_result -ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_x86_64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_x86_64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); - -//uhm, hello? +ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_x86_64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_x86_64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + +// uhm, hello? struct ubpf_jit_result -ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_null(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); +ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_null( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_null( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); char* ubpf_error(const char* fmt, ...); @@ -130,4 +163,10 @@ ubpf_fetch_instruction(const struct ubpf_vm* vm, uint16_t pc); void ubpf_store_instruction(const struct ubpf_vm* vm, uint16_t pc, struct ebpf_inst inst); +uint16_t +ubpf_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc); + +bool +ubpf_calculate_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc, char** errmsg); + #endif diff --git a/vm/ubpf_jit.c b/vm/ubpf_jit.c index cc534ade6..0b9f42396 100644 --- a/vm/ubpf_jit.c +++ b/vm/ubpf_jit.c @@ -19,6 +19,7 @@ * limitations under the License. */ +#include "ubpf.h" #define _GNU_SOURCE #include #include @@ -29,11 +30,10 @@ #include #include "ubpf_int.h" - int -ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) +ubpf_translate_ex(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg, enum JitMode jit_mode) { - struct ubpf_jit_result jit_result = vm->jit_translate(vm, buffer, size); + struct ubpf_jit_result jit_result = vm->jit_translate(vm, buffer, size, jit_mode); vm->jitted_result = jit_result; if (jit_result.errmsg) { *errmsg = jit_result.errmsg; @@ -41,8 +41,14 @@ ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) return jit_result.compile_result == UBPF_JIT_COMPILE_SUCCESS ? 0 : -1; } +int +ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) +{ + return ubpf_translate_ex(vm, buffer, size, errmsg, BasicJitMode); +} + struct ubpf_jit_result -ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct ubpf_jit_result compile_result; compile_result.compile_result = UBPF_JIT_COMPILE_FAILURE; @@ -52,11 +58,14 @@ ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) UNUSED_PARAMETER(vm); UNUSED_PARAMETER(buffer); UNUSED_PARAMETER(size); + UNUSED_PARAMETER(jit_mode); compile_result.errmsg = ubpf_error("Code can not be JITed on this target."); return compile_result; } -bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_dispatcher_null( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); UNUSED_PARAMETER(new_dispatcher); @@ -66,7 +75,9 @@ bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispa return false; } -bool ubpf_jit_update_helper_null(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_helper_null( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); UNUSED_PARAMETER(new_helper); @@ -86,15 +97,28 @@ ubpf_set_jit_code_size(struct ubpf_vm* vm, size_t code_size) ubpf_jit_fn ubpf_compile(struct ubpf_vm* vm, char** errmsg) +{ + return (ubpf_jit_fn)ubpf_compile_ex(vm, errmsg, BasicJitMode); +} + +ubpf_jit_ex_fn +ubpf_compile_ex(struct ubpf_vm* vm, char** errmsg, enum JitMode mode) { void* jitted = NULL; uint8_t* buffer = NULL; size_t jitted_size; - if (vm->jitted) { + if (vm->jitted && vm->jitted_result.compile_result == UBPF_JIT_COMPILE_SUCCESS && + vm->jitted_result.jit_mode == mode) { return vm->jitted; } + if (vm->jitted) { + munmap(vm->jitted, vm->jitted_size); + vm->jitted = NULL; + vm->jitted_size = 0; + } + *errmsg = NULL; if (!vm->insts) { @@ -109,7 +133,7 @@ ubpf_compile(struct ubpf_vm* vm, char** errmsg) goto out; } - if (ubpf_translate(vm, buffer, &jitted_size, errmsg) < 0) { + if (ubpf_translate_ex(vm, buffer, &jitted_size, errmsg, mode) < 0) { goto out; } @@ -138,7 +162,7 @@ ubpf_compile(struct ubpf_vm* vm, char** errmsg) } ubpf_jit_fn -ubpf_copy_jit(struct ubpf_vm *vm, void *buffer, size_t size, char **errmsg) +ubpf_copy_jit(struct ubpf_vm* vm, void* buffer, size_t size, char** errmsg) { // If compilation was not successfull or it has not even been attempted, // we cannot copy. diff --git a/vm/ubpf_jit_arm64.c b/vm/ubpf_jit_arm64.c index c67104069..9f9361a4f 100644 --- a/vm/ubpf_jit_arm64.c +++ b/vm/ubpf_jit_arm64.c @@ -24,7 +24,6 @@ #define _GNU_SOURCE #include -#include #include #include #include @@ -245,8 +244,7 @@ emit_loadstore_register( } static void -emit_loadstore_literal( - struct jit_state* state, enum LoadStoreOpcode op, enum Registers rt, uint32_t target) +emit_loadstore_literal(struct jit_state* state, enum LoadStoreOpcode op, enum Registers rt, uint32_t target) { note_load(state, target); const uint32_t reg_op_base = 0x08000000U; @@ -254,7 +252,7 @@ emit_loadstore_literal( } static void -emit_adr(struct jit_state *state, uint32_t offset, enum Registers rd) +emit_adr(struct jit_state* state, uint32_t offset, enum Registers rd) { note_lea(state, offset); uint32_t instr = 0x10000000 | rd; @@ -502,8 +500,9 @@ emit_movewide_immediate(struct jit_state* state, bool sixty_four, enum Registers /* Generate the function prologue. * * We set the stack to look like: - * SP on entry * ubpf_stack_size bytes of UBPF stack + * SP on entry + * SP on entry * Callee saved registers * Frame <- SP. * Precondition: The runtime stack pointer is 16-byte aligned. @@ -512,24 +511,27 @@ emit_movewide_immediate(struct jit_state* state, bool sixty_four, enum Registers static void emit_jit_prologue(struct jit_state* state, size_t ubpf_stack_size) { - uint32_t register_space = _countof(callee_saved_registers) * 8 + 2 * 8; - state->stack_size = align_to(ubpf_stack_size + register_space, 16); - emit_addsub_immediate(state, true, AS_SUB, SP, SP, state->stack_size); - - /* Set up frame */ + emit_addsub_immediate(state, true, AS_SUB, SP, SP, 16); emit_loadstorepair_immediate(state, LSP_STPX, R29, R30, SP, 0); - /* In ARM64 calling convention, R29 is the frame pointer. */ - emit_addsub_immediate(state, true, AS_ADD, R29, SP, 0); + state->stack_size = _countof(callee_saved_registers) * 8; + emit_addsub_immediate(state, true, AS_SUB, SP, SP, state->stack_size); /* Save callee saved registers */ unsigned i; for (i = 0; i < _countof(callee_saved_registers); i += 2) { emit_loadstorepair_immediate( - state, LSP_STPX, callee_saved_registers[i], callee_saved_registers[i + 1], SP, (i + 2) * 8); + state, LSP_STPX, callee_saved_registers[i], callee_saved_registers[i + 1], SP, (i) * 8); } + emit_addsub_immediate(state, true, AS_ADD, R29, SP, 0); - /* Setup UBPF frame pointer. */ - emit_addsub_immediate(state, true, AS_ADD, map_register(10), SP, state->stack_size); + if (state->jit_mode == BasicJitMode) { + /* Setup UBPF frame pointer. */ + emit_addsub_immediate(state, true, AS_ADD, map_register(10), SP, 0); + emit_addsub_immediate(state, true, AS_SUB, SP, SP, ubpf_stack_size); + } else { + emit_addsub_immediate(state, true, AS_ADD, map_register(10), R2, 0); + emit_addsub_register(state, true, AS_ADD, map_register(10), map_register(10), R3); + } /* Copy R0 to the volatile context for safe keeping. */ emit_logical_register(state, true, LOG_ORR, VOLATILE_CTXT, RZ, R0); @@ -539,6 +541,33 @@ emit_jit_prologue(struct jit_state* state, size_t ubpf_stack_size) state->entry_loc = state->offset; } +static void +emit_jit_epilogue(struct jit_state* state) +{ + state->exit_loc = state->offset; + + /* Move register 0 into R0 */ + if (map_register(0) != R0) { + emit_logical_register(state, true, LOG_ORR, R0, RZ, map_register(0)); + } + + /* We could be anywhere in the stack if we excepted. Get our head right. */ + emit_addsub_immediate(state, true, AS_ADD, SP, R29, 0); + + /* Restore callee-saved registers). */ + size_t i; + for (i = 0; i < _countof(callee_saved_registers); i += 2) { + emit_loadstorepair_immediate( + state, LSP_LDPX, callee_saved_registers[i], callee_saved_registers[i + 1], SP, (i) * 8); + } + emit_addsub_immediate(state, true, AS_ADD, SP, SP, state->stack_size); + + emit_loadstorepair_immediate(state, LSP_LDPX, R29, R30, SP, 0); + emit_addsub_immediate(state, true, AS_ADD, SP, SP, 16); + + emit_unconditionalbranch_register(state, BR_RET, R30); +} + static void emit_dispatched_external_helper_call(struct jit_state* state, struct ubpf_vm* vm, unsigned int idx) { @@ -602,44 +631,31 @@ emit_dispatched_external_helper_call(struct jit_state* state, struct ubpf_vm* vm static void emit_local_call(struct jit_state* state, uint32_t target_pc) { - uint32_t stack_movement = align_to(40, 16); + emit_loadstore_immediate(state, LS_LDRX, temp_register, SP, 0); + emit_addsub_register(state, true, AS_SUB, map_register(10), map_register(10), temp_register); + + uint32_t stack_movement = align_to(48, 16); emit_addsub_immediate(state, true, AS_SUB, SP, SP, stack_movement); + emit_loadstore_immediate(state, LS_STRX, R30, SP, 0); - emit_loadstorepair_immediate(state, LSP_STPX, map_register(6), map_register(7), SP, 8); - emit_loadstorepair_immediate(state, LSP_STPX, map_register(8), map_register(9), SP, 24); - emit_unconditionalbranch_immediate(state, UBR_BL, target_pc); - emit_loadstore_immediate(state, LS_LDRX, R30, SP, 0); - emit_loadstorepair_immediate(state, LSP_LDPX, map_register(6), map_register(7), SP, 8); - emit_loadstorepair_immediate(state, LSP_LDPX, map_register(8), map_register(9), SP, 24); - emit_addsub_immediate(state, true, AS_ADD, SP, SP, stack_movement); -} + emit_loadstore_immediate(state, LS_STRX, temp_register, SP, 8); + emit_loadstorepair_immediate(state, LSP_STPX, map_register(6), map_register(7), SP, 16); + emit_loadstorepair_immediate(state, LSP_STPX, map_register(8), map_register(9), SP, 32); -static void -emit_jit_epilogue(struct jit_state* state) -{ - state->exit_loc = state->offset; + emit_unconditionalbranch_immediate(state, UBR_BL, target_pc); - /* Move register 0 into R0 */ - if (map_register(0) != R0) { - emit_logical_register(state, true, LOG_ORR, R0, RZ, map_register(0)); - } + emit_loadstore_immediate(state, LS_LDRX, R30, SP, 0); + emit_loadstore_immediate(state, LS_LDRX, temp_register, SP, 8); + emit_loadstorepair_immediate(state, LSP_LDPX, map_register(6), map_register(7), SP, 16); + emit_loadstorepair_immediate(state, LSP_LDPX, map_register(8), map_register(9), SP, 32); - /* We could be anywhere in the stack if we excepted. Get our head right. */ - emit_addsub_immediate(state, true, AS_ADD, SP, R29, 0); + emit_addsub_immediate(state, true, AS_ADD, SP, SP, stack_movement); - /* Restore callee-saved registers). */ - size_t i; - for (i = 0; i < _countof(callee_saved_registers); i += 2) { - emit_loadstorepair_immediate( - state, LSP_LDPX, callee_saved_registers[i], callee_saved_registers[i + 1], SP, (i + 2) * 8); - } - emit_loadstorepair_immediate(state, LSP_LDPX, R29, R30, SP, 0); - emit_addsub_immediate(state, true, AS_ADD, SP, SP, state->stack_size); - emit_unconditionalbranch_register(state, BR_RET, R30); + emit_addsub_register(state, true, AS_ADD, map_register(10), map_register(10), temp_register); } static uint32_t -emit_dispatched_external_helper_address(struct jit_state *state, uint64_t dispatcher_addr) +emit_dispatched_external_helper_address(struct jit_state* state, uint64_t dispatcher_addr) { // We will assume that the buffer of memory holding the JIT'd code is 4-byte aligned. // And, because ARM is 32-bit instructions, we know that each instruction is 4-byte aligned. @@ -658,10 +674,11 @@ emit_dispatched_external_helper_address(struct jit_state *state, uint64_t dispat } static uint32_t -emit_helper_table(struct jit_state* state, struct ubpf_vm* vm) { +emit_helper_table(struct jit_state* state, struct ubpf_vm* vm) +{ uint32_t helper_table_address_target = state->offset; - for (int i = 0; iext_funcs[i], sizeof(uint64_t)); } return helper_table_address_target; @@ -938,7 +955,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) { int i; - emit_jit_prologue(state, UBPF_STACK_SIZE); + emit_jit_prologue(state, UBPF_EBPF_STACK_SIZE); for (i = 0; i < vm->num_insts; i++) { @@ -951,6 +968,12 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) struct ebpf_inst inst = ubpf_fetch_instruction(vm, i); state->pc_locs[i] = state->offset; + if (i == 0 || vm->int_funcs[i]) { + emit_movewide_immediate(state, true, temp_register, ubpf_stack_usage_for_local_func(vm, i)); + emit_addsub_immediate(state, true, AS_SUB, SP, SP, 16); + emit_loadstorepair_immediate(state, LSP_STPX, temp_register, temp_register, SP, 0); + } + enum Registers dst = map_register(inst.dst); enum Registers src = map_register(inst.src); uint8_t opcode = inst.opcode; @@ -1112,6 +1135,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } break; case EBPF_OP_EXIT: + emit_addsub_immediate(state, true, AS_ADD, SP, SP, 16); emit_unconditionalbranch_register(state, BR_RET, R30); break; @@ -1177,43 +1201,42 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) if (state->jit_status != NoError) { switch (state->jit_status) { - case TooManyJumps: { - *errmsg = ubpf_error("Too many jump instructions."); - break; - } - case TooManyLoads: { - *errmsg = ubpf_error("Too many load instructions."); - break; - } - case TooManyLeas: { - *errmsg = ubpf_error("Too many LEA calculations."); - break; - } - case UnexpectedInstruction: { - // errmsg set at time the error was detected because the message requires - // information about the unexpected instruction. - break; - } - case UnknownInstruction: { - // errmsg set at time the error was detected because the message requires - // information about the unknown instruction. - break; - } - case NotEnoughSpace: { - *errmsg = ubpf_error("Target buffer too small"); - break; - } - case NoError: { - assert(false); - } + case TooManyJumps: { + *errmsg = ubpf_error("Too many jump instructions."); + break; + } + case TooManyLoads: { + *errmsg = ubpf_error("Too many load instructions."); + break; + } + case TooManyLeas: { + *errmsg = ubpf_error("Too many LEA calculations."); + break; + } + case UnexpectedInstruction: { + // errmsg set at time the error was detected because the message requires + // information about the unexpected instruction. + break; + } + case UnknownInstruction: { + // errmsg set at time the error was detected because the message requires + // information about the unknown instruction. + break; + } + case NotEnoughSpace: { + *errmsg = ubpf_error("Target buffer too small"); + break; + } + case NoError: { + assert(false); + } } return -1; } - emit_jit_epilogue(state); - state->dispatcher_loc = emit_dispatched_external_helper_address(state, (uint64_t)vm->dispatcher); + state->dispatcher_loc = emit_dispatched_external_helper_address(state, (uint64_t)vm->dispatcher); state->helper_table_loc = emit_helper_table(state, vm); return 0; @@ -1277,7 +1300,6 @@ resolve_adr(struct jit_state* state, uint32_t instr_offset, int32_t immediate) memcpy(state->buf + instr_offset, &instr, sizeof(uint32_t)); } - static bool resolve_jumps(struct jit_state* state) { @@ -1345,12 +1367,13 @@ resolve_leas(struct jit_state* state) return true; } - -bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_dispatcher_arm64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); uint64_t jit_upper_bound = (uint64_t)buffer + size; - void *dispatcher_address = (void*)((uint64_t)buffer + offset); + void* dispatcher_address = (void*)((uint64_t)buffer + offset); if ((uint64_t)dispatcher_address + sizeof(void*) < jit_upper_bound) { memcpy(dispatcher_address, &new_dispatcher, sizeof(void*)); return true; @@ -1359,7 +1382,9 @@ bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_disp return false; } -bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_helper_arm64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); uint64_t jit_upper_bound = (uint64_t)buffer + size; @@ -1373,12 +1398,12 @@ bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsig } struct ubpf_jit_result -ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct jit_state state; struct ubpf_jit_result compile_result; - if (initialize_jit_state_result(&state, &compile_result, buffer, *size, &compile_result.errmsg) < 0) { + if (initialize_jit_state_result(&state, &compile_result, buffer, *size, jit_mode, &compile_result.errmsg) < 0) { goto out; } diff --git a/vm/ubpf_jit_support.c b/vm/ubpf_jit_support.c index ba96b9be0..c1f934257 100644 --- a/vm/ubpf_jit_support.c +++ b/vm/ubpf_jit_support.c @@ -19,13 +19,22 @@ #include "ubpf_jit_support.h" #include +#include "ubpf.h" #include "ubpf_int.h" int -initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result, uint8_t *buffer, uint32_t size, char **errmsg) { +initialize_jit_state_result( + struct jit_state* state, + struct ubpf_jit_result* compile_result, + uint8_t* buffer, + uint32_t size, + enum JitMode jit_mode, + char** errmsg) +{ compile_result->compile_result = UBPF_JIT_COMPILE_FAILURE; compile_result->errmsg = NULL; compile_result->external_dispatcher_offset = 0; + compile_result->jit_mode = jit_mode; state->offset = 0; state->size = size; @@ -38,6 +47,7 @@ initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *com state->num_loads = 0; state->num_leas = 0; state->jit_status = NoError; + state->jit_mode = jit_mode; if (!state->pc_locs || !state->jumps || !state->loads || !state->leas) { *errmsg = ubpf_error("Could not allocate space needed to JIT compile eBPF program"); @@ -48,7 +58,7 @@ initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *com } void -release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result) +release_jit_state_result(struct jit_state* state, struct ubpf_jit_result* compile_result) { UNUSED_PARAMETER(compile_result); free(state->pc_locs); @@ -62,7 +72,8 @@ release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compil } void -emit_patchable_relative(uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative *table, size_t index) +emit_patchable_relative( + uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative* table, size_t index) { struct patchable_relative* jump = &table[index]; jump->offset_loc = offset; @@ -82,9 +93,8 @@ note_lea(struct jit_state* state, uint32_t offset) emit_patchable_relative(state->offset, offset, 0, state->leas, state->num_leas++); } - void -fixup_jump_target(struct patchable_relative *table, size_t table_size, uint32_t src_offset, uint32_t dest_offset) +fixup_jump_target(struct patchable_relative* table, size_t table_size, uint32_t src_offset, uint32_t dest_offset) { for (size_t index = 0; index < table_size; index++) { if (table[index].offset_loc == src_offset) { @@ -97,4 +107,4 @@ void emit_jump_target(struct jit_state* state, uint32_t jump_src) { fixup_jump_target(state->jumps, state->num_jumps, jump_src, state->offset); -} \ No newline at end of file +} diff --git a/vm/ubpf_jit_support.h b/vm/ubpf_jit_support.h index 197389752..659bcc85a 100644 --- a/vm/ubpf_jit_support.h +++ b/vm/ubpf_jit_support.h @@ -28,7 +28,8 @@ #include #include "ubpf_int.h" -enum JitProgress { +enum JitProgress +{ NoError, TooManyJumps, TooManyLoads, @@ -51,10 +52,10 @@ struct patchable_relative /* Special values for target_pc in struct jump */ #define TARGET_PC_EXIT ~UINT32_C(0) -#define TARGET_PC_ENTER (~UINT32_C(0) & 0x01) -#define TARGET_PC_RETPOLINE (~UINT32_C(0) & 0x0101) +#define TARGET_PC_ENTER (~UINT32_C(0) & 0x01) +#define TARGET_PC_RETPOLINE (~UINT32_C(0) & 0x0101) #define TARGET_PC_EXTERNAL_DISPATCHER (~UINT32_C(0) & 0x010101) -#define TARGET_LOAD_HELPER_TABLE (~UINT32_C(0) & 0x01010101) +#define TARGET_LOAD_HELPER_TABLE (~UINT32_C(0) & 0x01010101) struct jit_state { @@ -85,6 +86,7 @@ struct jit_state */ uint32_t helper_table_loc; enum JitProgress jit_status; + enum JitMode jit_mode; struct patchable_relative* jumps; struct patchable_relative* loads; struct patchable_relative* leas; @@ -95,13 +97,20 @@ struct jit_state }; int -initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result, uint8_t *buffer, uint32_t size, char **errmsg); +initialize_jit_state_result( + struct jit_state* state, + struct ubpf_jit_result* compile_result, + uint8_t* buffer, + uint32_t size, + enum JitMode jit_mode, + char** errmsg); void -release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result); +release_jit_state_result(struct jit_state* state, struct ubpf_jit_result* compile_result); void -emit_patchable_relative(uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative *table, size_t index); +emit_patchable_relative( + uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative* table, size_t index); void note_load(struct jit_state* state, uint32_t target_pc); @@ -113,5 +122,5 @@ void emit_jump_target(struct jit_state* state, uint32_t jump_src); void -fixup_jump_target(struct patchable_relative *table, size_t table_size, uint32_t src_offset, uint32_t dest_offset); +fixup_jump_target(struct patchable_relative* table, size_t table_size, uint32_t src_offset, uint32_t dest_offset); #endif diff --git a/vm/ubpf_jit_x86_64.c b/vm/ubpf_jit_x86_64.c index 467d6daab..00bd1094b 100644 --- a/vm/ubpf_jit_x86_64.c +++ b/vm/ubpf_jit_x86_64.c @@ -18,6 +18,7 @@ * limitations under the License. */ +#include "ubpf.h" #include "ubpf_jit_support.h" #define _GNU_SOURCE @@ -63,36 +64,43 @@ muldivmod(struct jit_state* state, uint8_t opcode, int src, int dst, int32_t imm #define RCX_ALT R10 #if defined(_WIN32) -static int platform_nonvolatile_registers[] = {RBP, RBX, RDI, RSI, R13, R14, R15}; +static int platform_nonvolatile_registers[] = {RBP, RBX, RDI, RSI, R12, R13, R14, R15}; // Callee-saved registers. +static int platform_volatile_registers[] = {RAX, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). static int platform_parameter_registers[] = {RCX, RDX, R8, R9}; static int register_map[REGISTER_MAP_SIZE] = { + // Scratch registers RAX, R10, RDX, R8, R9, - R14, - R15, + R12, + // Non-volatile registers + RBX, RDI, RSI, - RBX, - RBP, + R14, + R15, // Until further notice, r15 must be mapped to eBPF register r10 }; #else -static int platform_nonvolatile_registers[] = {RBP, RBX, R13, R14, R15}; +static int platform_nonvolatile_registers[] = {RBP, RBX, R12, R13, R14, R15}; // Callee-saved registers. +static int platform_volatile_registers[] = { + RAX, RDI, RSI, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). static int platform_parameter_registers[] = {RDI, RSI, RDX, RCX, R8, R9}; static int register_map[REGISTER_MAP_SIZE] = { + // Scratch registers RAX, RDI, RSI, RDX, R10, R8, + // Non-volatile registers RBX, + R12, R13, R14, - R15, - RBP, + R15, // Until further notice, r15 must be mapped to eBPF register r10 }; #endif @@ -105,16 +113,22 @@ map_register(int r) } static inline void -emit_local_call(struct jit_state* state, uint32_t target_pc) +emit_local_call(struct ubpf_vm* vm, struct jit_state* state, uint32_t target_pc) { - /* - * Pushing 4 * 8 = 32 bytes will maintain the invariant - * that the stack is 16-byte aligned. - */ + UNUSED_PARAMETER(vm); + // Because the top of the stack holds the stack usage of the calling function, + // we adjust the base pointer down by that value! + // sub r15, [rsp] + emit1(state, 0x4c); + emit1(state, 0x2B); + emit1(state, 0x3C); // Mod: 00b Reg: 111b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b + emit_push(state, map_register(BPF_REG_6)); emit_push(state, map_register(BPF_REG_7)); emit_push(state, map_register(BPF_REG_8)); emit_push(state, map_register(BPF_REG_9)); + #if defined(_WIN32) /* Windows x64 ABI requires home register space */ /* Allocate home register space - 4 registers */ @@ -122,6 +136,7 @@ emit_local_call(struct jit_state* state, uint32_t target_pc) #endif emit1(state, 0xe8); // e8 is the opcode for a CALL emit_jump_address_reloc(state, target_pc); + #if defined(_WIN32) /* Deallocate home register space - 4 registers */ emit_alu64_imm32(state, 0x81, 0, RSP, 4 * sizeof(uint64_t)); @@ -130,12 +145,19 @@ emit_local_call(struct jit_state* state, uint32_t target_pc) emit_pop(state, map_register(BPF_REG_8)); emit_pop(state, map_register(BPF_REG_7)); emit_pop(state, map_register(BPF_REG_6)); + + // Because the top of the stack holds the stack usage of the calling function, + // we adjust the base pointer back up by that value! + // add r15, [rsp] + emit1(state, 0x4c); + emit1(state, 0x03); + emit1(state, 0x3C); // Mod: 00b Reg: 111b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b } static uint32_t emit_dispatched_external_helper_address(struct jit_state* state, struct ubpf_vm* vm) { - uint32_t external_helper_address_target = state->offset; emit8(state, (uint64_t)vm->dispatcher); return external_helper_address_target; @@ -237,6 +259,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) { int i; + (void)platform_volatile_registers; /* Save platform non-volatile registers */ for (i = 0; i < _countof(platform_nonvolatile_registers); i++) { emit_push(state, platform_nonvolatile_registers[i]); @@ -247,7 +270,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) emit_mov(state, platform_parameter_registers[0], map_register(BPF_REG_1)); } - /* Move the platform parameter register to the (volatile) register + /* Move the first platform parameter register to the (volatile) register * that holds the pointer to the context. */ emit_mov(state, platform_parameter_registers[0], VOLATILE_CTXT); @@ -256,7 +279,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) * Assuming that the stack is 16-byte aligned right before * the call insn that brought us to this code, when * we start executing the jit'd code, we need to regain a 16-byte - * alignment. The UBPF_STACK_SIZE is guaranteed to be + * alignment. The UBPF_EBPF_STACK_SIZE is guaranteed to be * divisible by 16. However, if we pushed an even number of * registers on the stack when we are saving state (see above), * then we have to add an additional 8 bytes to get back @@ -267,12 +290,24 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } /* - * Set BPF R10 (the way to access the frame in eBPF) to match RSP. + * Let's set RBP to RSP so that we can restore RSP later! */ - emit_mov(state, RSP, map_register(BPF_REG_10)); - - /* Allocate stack space */ - emit_alu64_imm32(state, 0x81, 5, RSP, UBPF_STACK_SIZE); + emit_mov(state, RSP, RBP); + + /* Configure eBPF program stack space */ + if (state->jit_mode == BasicJitMode) { + /* + * Set BPF R10 (the way to access the frame in eBPF) the beginning + * of the eBPF program's stack space. + */ + emit_mov(state, RSP, map_register(BPF_REG_10)); + /* Allocate eBPF program stack space */ + emit_alu64_imm32(state, 0x81, 5, RSP, UBPF_EBPF_STACK_SIZE); + } else { + /* Use given eBPF program stack space */ + emit_mov(state, platform_parameter_registers[2], map_register(BPF_REG_10)); + emit_alu64(state, 0x01, platform_parameter_registers[3], map_register(BPF_REG_10)); + } #if defined(_WIN32) /* Windows x64 ABI requires home register space */ @@ -308,10 +343,13 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) uint32_t target_pc = i + inst.offset + 1; if (i == 0 || vm->int_funcs[i]) { - /* When we are the subject of a call, we have to properly align our - * stack pointer. - */ + uint16_t stack_usage = ubpf_stack_usage_for_local_func(vm, i); emit_alu64_imm32(state, 0x81, 5, RSP, 8); + emit1(state, 0x48); + emit1(state, 0xC7); + emit1(state, 0x04); // Mod: 00b Reg: 000b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b + emit4(state, stack_usage); } switch (inst.opcode) { @@ -662,7 +700,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } } else if (inst.src == 1) { target_pc = i + inst.imm + 1; - emit_local_call(state, target_pc); + emit_local_call(vm, state, target_pc); } break; case EBPF_OP_EXIT: @@ -768,8 +806,8 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) emit_mov(state, map_register(BPF_REG_0), RAX); } - /* Deallocate stack space by restoring RSP from BPF R10. */ - emit_mov(state, map_register(BPF_REG_10), RSP); + /* Deallocate stack space by restoring RSP from RBP. */ + emit_mov(state, RBP, RSP); if (!(_countof(platform_nonvolatile_registers) % 2)) { emit_alu64_imm32(state, 0x81, 0, RSP, 0x8); @@ -977,12 +1015,12 @@ resolve_patchable_relatives(struct jit_state* state) } struct ubpf_jit_result -ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct jit_state state; struct ubpf_jit_result compile_result; - if (initialize_jit_state_result(&state, &compile_result, buffer, *size, &compile_result.errmsg) < 0) { + if (initialize_jit_state_result(&state, &compile_result, buffer, *size, jit_mode, &compile_result.errmsg) < 0) { goto out; } @@ -998,6 +1036,7 @@ ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) compile_result.compile_result = UBPF_JIT_COMPILE_SUCCESS; compile_result.external_dispatcher_offset = state.dispatcher_loc; compile_result.external_helper_offset = state.helper_table_loc; + compile_result.jit_mode = jit_mode; *size = state.offset; out: diff --git a/vm/ubpf_vm.c b/vm/ubpf_vm.c index e2d71ae00..41d05e346 100644 --- a/vm/ubpf_vm.c +++ b/vm/ubpf_vm.c @@ -45,7 +45,8 @@ bounds_check( uint16_t cur_pc, void* mem, size_t mem_len, - void* stack); + void* stack, + size_t stack_len); bool ubpf_toggle_bounds_check(struct ubpf_vm* vm, bool enable) @@ -65,7 +66,14 @@ ubpf_set_error_print(struct ubpf_vm* vm, int (*error_printf)(FILE* stream, const } static uint64_t -ubpf_default_external_dispatcher(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, unsigned int index, external_function_t *external_fns) +ubpf_default_external_dispatcher( + uint64_t arg1, + uint64_t arg2, + uint64_t arg3, + uint64_t arg4, + uint64_t arg5, + unsigned int index, + external_function_t* external_fns) { return external_fns[index](arg1, arg2, arg3, arg4, arg5); } @@ -90,6 +98,12 @@ ubpf_create(void) return NULL; } + vm->local_func_stack_usage = calloc(UBPF_MAX_INSTS, sizeof(struct ubpf_stack_usage)); + if (vm->local_func_stack_usage == NULL) { + ubpf_destroy(vm); + return NULL; + } + vm->bounds_check_enabled = true; vm->error_printf = fprintf; @@ -118,6 +132,7 @@ ubpf_destroy(struct ubpf_vm* vm) free(vm->int_funcs); free(vm->ext_funcs); free(vm->ext_func_names); + free(vm->local_func_stack_usage); free(vm); } @@ -127,7 +142,6 @@ as_external_function_t(void* f) return (external_function_t)f; }; - int ubpf_register(struct ubpf_vm* vm, unsigned int idx, const char* name, external_function_t fn) { @@ -146,7 +160,8 @@ ubpf_register(struct ubpf_vm* vm, unsigned int idx, const char* name, external_f } // Now, update! - if (!vm->jit_update_helper(vm, fn, idx, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_helper_offset)) { + if (!vm->jit_update_helper( + vm, fn, idx, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_helper_offset)) { // Can't immediately stop here because we have unprotected memory! success = -1; } @@ -173,7 +188,8 @@ ubpf_register_external_dispatcher( } // Now, update! - if (!vm->jit_update_dispatcher(vm, dispatcher, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_dispatcher_offset)) { + if (!vm->jit_update_dispatcher( + vm, dispatcher, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_dispatcher_offset)) { // Can't immediately stop here because we have unprotected memory! success = -1; } @@ -215,8 +231,8 @@ ubpf_load(struct ubpf_vm* vm, const void* code, uint32_t code_len, char** errmsg const struct ebpf_inst* source_inst = code; *errmsg = NULL; - if (UBPF_STACK_SIZE % sizeof(uint64_t) != 0) { - *errmsg = ubpf_error("UBPF_STACK_SIZE must be a multiple of 8"); + if (UBPF_EBPF_STACK_SIZE % sizeof(uint64_t) != 0) { + *errmsg = ubpf_error("UBPF_EBPF_STACK_SIZE must be a multiple of 8"); return -1; } @@ -299,12 +315,13 @@ i32(uint64_t x) * @param[in] immediate The signed 32-bit immediate value to sign extend. * @return The sign extended 64-bit value. */ -static int64_t i64(int32_t immediate) { +static int64_t +i64(int32_t immediate) +{ return (int64_t)immediate; - } -#define IS_ALIGNED(x, a) (((uintptr_t)(x) & ((a)-1)) == 0) +#define IS_ALIGNED(x, a) (((uintptr_t)(x) & ((a) - 1)) == 0) inline static uint64_t ubpf_mem_load(uint64_t address, size_t size) @@ -357,44 +374,30 @@ ubpf_mem_store(uint64_t address, uint64_t value, size_t size) } int -ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value) +ubpf_exec_ex( + const struct ubpf_vm* vm, + void* mem, + size_t mem_len, + uint64_t* bpf_return_value, + uint8_t* stack_start, + size_t stack_length) { uint16_t pc = 0; const struct ebpf_inst* insts = vm->insts; uint64_t* reg; uint64_t _reg[16]; - uint64_t ras_index = 0; + uint64_t stack_frame_index = 0; int return_value = -1; - void *external_dispatcher_cookie = mem; - -// Windows Kernel mode limits stack usage to 12K, so we need to allocate it dynamically. -#if defined(NTDDI_VERSION) && defined(WINNT) - uint64_t* stack = NULL; - struct ubpf_stack_frame* stack_frames = NULL; + void* external_dispatcher_cookie = mem; - stack = calloc(UBPF_STACK_SIZE, 1); - if (!stack) { - return_value = -1; - goto cleanup; - } - - stack_frames = calloc(UBPF_MAX_CALL_DEPTH, sizeof(struct ubpf_stack_frame)); - if (!stack_frames) { - return_value = -1; - goto cleanup; + if (!insts) { + /* Code must be loaded before we can execute */ + return -1; } -#else - uint64_t stack[UBPF_STACK_SIZE / sizeof(uint64_t)]; struct ubpf_stack_frame stack_frames[UBPF_MAX_CALL_DEPTH] = { 0, }; -#endif - - if (!insts) { - /* Code must be loaded before we can execute */ - return -1; - } #ifdef DEBUG if (vm->regs) @@ -407,7 +410,7 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret reg[1] = (uintptr_t)mem; reg[2] = (uint64_t)mem_len; - reg[10] = (uintptr_t)stack + UBPF_STACK_SIZE; + reg[10] = (uintptr_t)stack_start + stack_length; int instruction_limit = vm->instruction_limit; @@ -421,6 +424,11 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret return_value = -1; goto cleanup; } + + if (pc == 0 || vm->int_funcs[pc]) { + stack_frames[stack_frame_index].stack_usage = ubpf_stack_usage_for_local_func(vm, pc); + } + struct ebpf_inst inst = ubpf_fetch_instruction(vm, pc++); switch (inst.opcode) { @@ -622,19 +630,37 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret * * Needed since we don't have a verifier yet. */ -#define BOUNDS_CHECK_LOAD(size) \ - do { \ - if (!bounds_check(vm, (char*)reg[inst.src] + inst.offset, size, "load", cur_pc, mem, mem_len, stack)) { \ - return_value = -1; \ - goto cleanup; \ - } \ +#define BOUNDS_CHECK_LOAD(size) \ + do { \ + if (!bounds_check( \ + vm, \ + (char*)reg[inst.src] + inst.offset, \ + size, \ + "load", \ + cur_pc, \ + mem, \ + mem_len, \ + stack_start, \ + stack_length)) { \ + return_value = -1; \ + goto cleanup; \ + } \ } while (0) -#define BOUNDS_CHECK_STORE(size) \ - do { \ - if (!bounds_check(vm, (char*)reg[inst.dst] + inst.offset, size, "store", cur_pc, mem, mem_len, stack)) { \ - return_value = -1; \ - goto cleanup; \ - } \ +#define BOUNDS_CHECK_STORE(size) \ + do { \ + if (!bounds_check( \ + vm, \ + (char*)reg[inst.dst] + inst.offset, \ + size, \ + "store", \ + cur_pc, \ + mem, \ + mem_len, \ + stack_start, \ + stack_length)) { \ + return_value = -1; \ + goto cleanup; \ + } \ } while (0) case EBPF_OP_LDXW: @@ -916,13 +942,14 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret } break; case EBPF_OP_EXIT: - if (ras_index > 0) { - ras_index--; - pc = stack_frames[ras_index].return_address; - reg[BPF_REG_6] = stack_frames[ras_index].saved_registers[0]; - reg[BPF_REG_7] = stack_frames[ras_index].saved_registers[1]; - reg[BPF_REG_8] = stack_frames[ras_index].saved_registers[2]; - reg[BPF_REG_9] = stack_frames[ras_index].saved_registers[3]; + if (stack_frame_index > 0) { + stack_frame_index--; + pc = stack_frames[stack_frame_index].return_address; + reg[BPF_REG_6] = stack_frames[stack_frame_index].saved_registers[0]; + reg[BPF_REG_7] = stack_frames[stack_frame_index].saved_registers[1]; + reg[BPF_REG_8] = stack_frames[stack_frame_index].saved_registers[2]; + reg[BPF_REG_9] = stack_frames[stack_frame_index].saved_registers[3]; + reg[BPF_REG_10] += stack_frames[stack_frame_index].stack_usage; break; } *bpf_return_value = reg[0]; @@ -934,9 +961,11 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret if (inst.src == 0) { // Handle call by address to external function. if (vm->dispatcher != NULL) { - reg[0] = vm->dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, external_dispatcher_cookie); + reg[0] = + vm->dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, external_dispatcher_cookie); } else { - reg[0] = ubpf_default_external_dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, vm->ext_funcs); + reg[0] = ubpf_default_external_dispatcher( + reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, vm->ext_funcs); } if (inst.imm == vm->unwind_stack_extension_index && reg[0] == 0) { *bpf_return_value = reg[0]; @@ -944,22 +973,25 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret goto cleanup; } } else if (inst.src == 1) { - if (ras_index >= UBPF_MAX_CALL_DEPTH) { + if (stack_frame_index >= UBPF_MAX_CALL_DEPTH) { vm->error_printf( stderr, "uBPF error: number of nested functions calls (%lu) exceeds max (%lu) at PC %u\n", - ras_index + 1, + stack_frame_index + 1, UBPF_MAX_CALL_DEPTH, cur_pc); return_value = -1; goto cleanup; } - stack_frames[ras_index].saved_registers[0] = reg[BPF_REG_6]; - stack_frames[ras_index].saved_registers[1] = reg[BPF_REG_7]; - stack_frames[ras_index].saved_registers[2] = reg[BPF_REG_8]; - stack_frames[ras_index].saved_registers[3] = reg[BPF_REG_9]; - stack_frames[ras_index].return_address = pc; - ras_index++; + stack_frames[stack_frame_index].saved_registers[0] = reg[BPF_REG_6]; + stack_frames[stack_frame_index].saved_registers[1] = reg[BPF_REG_7]; + stack_frames[stack_frame_index].saved_registers[2] = reg[BPF_REG_8]; + stack_frames[stack_frame_index].saved_registers[3] = reg[BPF_REG_9]; + stack_frames[stack_frame_index].return_address = pc; + + reg[BPF_REG_10] -= stack_frames[stack_frame_index].stack_usage; + + stack_frame_index++; pc += inst.imm; break; } else if (inst.src == 2) { @@ -976,11 +1008,32 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret cleanup: #if defined(NTDDI_VERSION) && defined(WINNT) free(stack_frames); - free(stack); #endif return return_value; } +int +ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value) +{ +// Windows Kernel mode limits stack usage to 12K, so we need to allocate it dynamically. +#if defined(NTDDI_VERSION) && defined(WINNT) + uint64_t* stack = NULL; + struct ubpf_stack_frame* stack_frames = NULL; + + stack = calloc(UBPF_EBPF_STACK_SIZE, 1); + if (!stack) { + return -1; + } +#else + uint64_t stack[UBPF_EBPF_STACK_SIZE / sizeof(uint64_t)]; +#endif + int result = ubpf_exec_ex(vm, mem, mem_len, bpf_return_value, (uint8_t*)stack, UBPF_EBPF_STACK_SIZE); +#if defined(NTDDI_VERSION) && defined(WINNT) + free(stack); +#endif + return result; +} + static bool validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_insts, char** errmsg) { @@ -989,6 +1042,10 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i return false; } + if (!ubpf_calculate_stack_usage_for_local_func(vm, 0, errmsg)) { + return false; + } + int i; for (i = 0; i < num_insts; i++) { struct ebpf_inst inst = insts[i]; @@ -1047,8 +1104,11 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i case EBPF_OP_MOD64_REG: case EBPF_OP_XOR64_IMM: case EBPF_OP_XOR64_REG: + break; case EBPF_OP_MOV64_IMM: case EBPF_OP_MOV64_REG: + store = true; + break; case EBPF_OP_ARSH64_IMM: case EBPF_OP_ARSH64_REG: break; @@ -1159,6 +1219,9 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i ubpf_error("call to local function (at PC %d) is out of bounds (target: %d)", i, call_target); return false; } + if (!ubpf_calculate_stack_usage_for_local_func(vm, call_target, errmsg)) { + return false; + } } else if (inst.src == 2) { *errmsg = ubpf_error("call to external function by BTF ID (at PC %d) is not supported", i); return false; @@ -1205,15 +1268,16 @@ bounds_check( uint16_t cur_pc, void* mem, size_t mem_len, - void* stack) + void* stack, + size_t stack_len) { if (!vm->bounds_check_enabled) return true; - uintptr_t access_start= (uintptr_t)addr; + uintptr_t access_start = (uintptr_t)addr; uintptr_t access_end = access_start + size; uintptr_t stack_start = (uintptr_t)stack; - uintptr_t stack_end = stack_start + UBPF_STACK_SIZE; + uintptr_t stack_end = stack_start + stack_len; uintptr_t mem_start = (uintptr_t)mem; uintptr_t mem_end = mem_start + mem_len; @@ -1223,12 +1287,7 @@ bounds_check( if (access_start > access_end) { vm->error_printf( - stderr, - "uBPF error: invalid memory access %s at PC %u, addr %p, size %d\n", - type, - cur_pc, - addr, - size); + stderr, "uBPF error: invalid memory access %s at PC %u, addr %p, size %d\n", type, cur_pc, addr, size); return false; } @@ -1249,7 +1308,8 @@ bounds_check( // The address may be invalid or it may be a region of memory that the caller // is aware of but that is not part of the stack or memory. // Call any registered bounds check function to determine if the access is valid. - if (vm->bounds_check_function != NULL && vm->bounds_check_function(vm->bounds_check_user_data, access_start, size)) { + if (vm->bounds_check_function != NULL && + vm->bounds_check_function(vm->bounds_check_user_data, access_start, size)) { return true; } @@ -1266,7 +1326,7 @@ bounds_check( mem, mem_len, stack, - UBPF_STACK_SIZE); + UBPF_EBPF_STACK_SIZE); return false; } @@ -1388,3 +1448,45 @@ ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previou vm->instruction_limit = limit; return 0; } + +bool +ubpf_calculate_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc, char** errmsg) +{ + // If there is a stack usage calculator and we have not invoked it before for the target, + // then now is the time to call it! + if (vm->stack_usage_calculator && !vm->local_func_stack_usage[pc].stack_usage_calculated) { + uint16_t stack_usage = (vm->stack_usage_calculator)(vm, pc, vm->stack_usage_calculator_cookie); + vm->local_func_stack_usage[pc].stack_usage = stack_usage; + } + vm->local_func_stack_usage[pc].stack_usage_calculated = true; + // Now that we are guaranteed to have a value for the amount of the stack used by the function + // starting at call_target, let's make sure that it is 16-byte aligned. Note: The amount of stack + // used might be 0 (in the case where there is no registered stack usage calculator callback). That + // is okay because ubpf_stack_usage_for_local_func will give us a meaningful default. + if (vm->local_func_stack_usage[pc].stack_usage % 16) { + *errmsg = ubpf_error( + "local function (at PC %d) has improperly sized stack use (%d)", + pc, + vm->local_func_stack_usage[pc].stack_usage); + return false; + } + return true; +} + +uint16_t +ubpf_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc) +{ + uint16_t stack_usage = UBPF_EBPF_STACK_SIZE; + if (vm->local_func_stack_usage[pc].stack_usage_calculated) { + stack_usage = vm->local_func_stack_usage[pc].stack_usage; + } + return stack_usage; +} + +int +ubpf_register_stack_usage_calculator(struct ubpf_vm* vm, stack_usage_calculator_t calculator, void* cookie) +{ + vm->stack_usage_calculator_cookie = cookie; + vm->stack_usage_calculator = calculator; + return 0; +}