Skip to content

Commit

Permalink
[PAL/Linux-SGX] Add AEX-Notify flows in exception handling
Browse files Browse the repository at this point in the history
This commit adds the AEX-Notify flows inside the enclave.

The stage-1 signal handler is augmented as follows when AEX-Notify is
enabled: manually restore SSA[0] context, invoke the EDECCSSA
instruction instead of EEXIT (to go from SSA[1] to SSA[0] without
exiting the enclave) and finally jump to SSA[0].GPRSGX.RIP to resume
enclave execution (it will resume in stage-2 signal handler).

The stage-2 signal handler is augmented as follows: set bit 0 of
SSA[0].GPRSGX.AEXNOTIFY (so that AEX-Notify starts working again for
this thread), then apply AEX-Notify mitigations and finally restore
regular enclave execution.

This commit does not add any real AEX-Notify mitigations. Instead, we
count the number of AEX events reported inside the SGX enclave and print
this number on enclave termination (if log level is at least "warning").

Note that current implementation of AEX-Notify does not use the
checkpoint mechanism described in the official AEX-Notify whitepaper.
That checkpoint mechanism allows to coalesce multiple AEX events
that occur during the execution of mitigations. This saves some CPU
cycles and some signal-handling stack space, but we leave implementing
this optimization as future work.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
Dmitrii Kuvaiskii committed Oct 16, 2024
1 parent 45f12b3 commit 5a8651c
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 8 deletions.
78 changes: 77 additions & 1 deletion pal/src/host/linux-sgx/enclave_entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@
# Its sole purpose is to prepare the stage-2 handler by:
# - copying the interrupted SSA[0] context on the stack ("CPU context")
# - rewiring the SSA[0] context to point to _PalExceptionHandler()
# - invoking EEXIT (so that untrusted runtime can perform ERESUME)
# - handing over control to the SSA[0] context, in one of two ways:
# - AEX-Notify disabled or unavailable (legacy default flows): invoke EEXIT (so that untrusted
# runtime can perform ERESUME which will resume enclave execution in the SSA[0] context)
# - AEX-Notify enabled (explicit opt-in flows): manually restore SSA[0] context, invoke
# EDECCSSA instruction to go from SSA[1] to SSA[0] without exiting the enclave, and jump to
# SSA[0].GPRSGX.RIP to resume enclave execution

#include "sgx_arch.h"
#include "asm-offsets.h"
Expand Down Expand Up @@ -544,6 +549,13 @@ enclave_entry:
movq %r10, %rdx

.Lcssa1_exception_eexit:
movzbq g_aex_notify_enabled(%rip), %r10
cmpq $0, %r10
jne .Lcssa1_exception_eexit_aexnotify

.Lcssa1_exception_eexit_legacy:
# AEX-Notify is disabled/unavailable, stage-1 exception handler follows the normal EEXIT flow

# .Lcssa0_ocall_or_cssa1_exception_eexit has an ABI that uses RSI, RDI, RSP; clear the relevant
# regs (note that stage-1 handler didn't clobber RSP -- which contains an untrusted pointer to
# untrusted-runtime stack -- but this flow doesn't read/write RSP at all so there is no need to
Expand All @@ -555,6 +567,70 @@ enclave_entry:
movq %rdx, %rbx
jmp .Lcssa0_ocall_or_cssa1_exception_eexit

.Lcssa1_exception_eexit_aexnotify:
# AEX-Notify is enabled, stage-1 exception handler doesn't invoke EEXIT but instead the new
# EDECCSSA instruction (before this, need to manually restore SSA[0] context)

# After restoring SSA[0] context into GPRs, we'll need to jump to the stage-2 handler, so
# memorize SSA[0].GPRSGX.RIP in an otherwise-unused R11. There is a corner case of the stage-1
# handler's final jmp instruction, see comment at .Lcssa1_exception_eexit_aexnotify_finaljmp.
leaq .Lcssa1_exception_eexit_aexnotify_finaljmp(%rip), %r11
cmpq %r11, SGX_GPR_RIP(%rbx)
je 1f

# not at the stage-1 handler's final jmp instruction, use SSA0's RIP directly
movq SGX_GPR_RIP(%rbx), %r11
jmp 2f

1:
# at the stage-1 handler's final jmp instruction, skip over this jmp instruction by directly
# jumping to the previously-saved SSA0's RIP (which was saved in R11)
movq SGX_GPR_R11(%rbx), %r11

2:
# Clear bit 0 within SSA[0].GPRSGX.AEXNOTIFY (so that ERESUME actually resumes stage-2
# handler if it was interrupted by yet another AEX). The stage-2 handler, _PalExceptionHandler()
# func, will re-instate this bit before applying mitigations.
movb $0, SGX_GPR_AEXNOTIFY(%rbx)

# restore context from SSA[0] (which was already modified above to jump to stage-2 C handler);
# note that XSAVE area (xregs) was already restored above, so only need to restore GPRs;
# note that we don't care about SSA[0].GPRSGX.{URSP,URBP,EXITINFO,RESERVED,GSBASE}

leaq SGX_GPR_RFLAGS(%rbx), %rsp # trick to restore RFLAGS directly from SSA[0].GPRSGX.RFLAGS
popfq

movq SGX_GPR_FSBASE(%rbx), %rdi
.byte 0xf3, 0x48, 0x0f, 0xae, 0xd7 # WRFSBASE %RDI

movq SGX_GPR_RDI(%rbx), %rdi # 1st arg to _PalExceptionHandler()
movq SGX_GPR_RSI(%rbx), %rsi # 2nd arg to _PalExceptionHandler()
movq SGX_GPR_RDX(%rbx), %rdx # 3rd arg to _PalExceptionHandler()
movq SGX_GPR_RCX(%rbx), %rcx # 4th arg to _PalExceptionHandler()
movq SGX_GPR_R8(%rbx), %r8 # 5th arg to _PalExceptionHandler()
movq SGX_GPR_R9(%rbx), %r9 # not strictly needed
movq SGX_GPR_R10(%rbx), %r10 # not strictly needed
movq SGX_GPR_R12(%rbx), %r12 # not strictly needed
movq SGX_GPR_R13(%rbx), %r13 # not strictly needed
movq SGX_GPR_R14(%rbx), %r14 # not strictly needed
movq SGX_GPR_R15(%rbx), %r15 # not strictly needed
movq SGX_GPR_RBP(%rbx), %rbp # not strictly needed
movq SGX_GPR_RSP(%rbx), %rsp
xorq %rbx, %rbx # for sanity

# go from SSA[1] to SSA[0] (more specifically, simply decrement CSSA from 1 to 0);
# must be careful after this ENCLU instruction because may be interrupted by new exceptions
movq $EDECCSSA, %rax
enclu

# Finally jump to the stage-2 C exception handler. An async signal can arrive at this exact jmp
# instruction. At this point, SSA0 has the correct internally-consistent context for the
# "stage-2 exception handler", so in this corner case the stage-1 exception handler can skip
# over this jmp and rewire SSA0's RIP directly to _PalExceptionHandler(), which at this point is
# stored in SSA[0].GPRSGX.R11. See also code at .Lcssa1_exception_eexit_aexnotify.
.Lcssa1_exception_eexit_aexnotify_finaljmp:
jmp *%r11

.cfi_endproc


Expand Down
2 changes: 2 additions & 0 deletions pal/src/host/linux-sgx/generated_offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const struct generated_offset generated_offsets[] = {
OFFSET_T(SGX_GPR_RIP, sgx_pal_gpr_t, rip),
OFFSET_T(SGX_GPR_EXITINFO, sgx_pal_gpr_t, exitinfo),
OFFSET_T(SGX_GPR_AEXNOTIFY, sgx_pal_gpr_t, aexnotify),
OFFSET_T(SGX_GPR_FSBASE, sgx_pal_gpr_t, fsbase),
DEFINE(SGX_GPR_SIZE, sizeof(sgx_pal_gpr_t)),

/* sgx_cpu_context_t */
Expand Down Expand Up @@ -169,6 +170,7 @@ const struct generated_offset generated_offsets[] = {

/* pal.h */
DEFINE(PAL_EVENT_NO_EVENT, PAL_EVENT_NO_EVENT),
DEFINE(PAL_EVENT_INTERRUPTED, PAL_EVENT_INTERRUPTED),
DEFINE(PAL_EVENT_NUM_BOUND, PAL_EVENT_NUM_BOUND),

/* errno */
Expand Down
10 changes: 10 additions & 0 deletions pal/src/host/linux-sgx/host_entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ async_exit_pointer:
movb $0, %gs:PAL_HOST_TCB_IN_AEX
.cfi_endproc

# In case of non-AEX-Notify flows, ERESUME never morphs into EENTER, so the value in RDI is
# ignored. Below code snippet becomes a no-op.
# In case of AEX-Notify flows, ERESUME will morph into EENTER. The only way we could arrive to
# this line of code is that there was no pending signal to handle inside the SGX enclave, i.e.,
# maybe_raise_pending_signal() didn't do anything. Since there was no real pending signal (but
# AEX happened, so AEX-Notify must react to this), we put a dummy PAL_EVENT_INTERRUPTED (aka
# SIGCONT). By putting this dummy signal, we survive all checks inside Gramine's in-enclave
# logic, because SIGCONT can always arrive and is benign and side-effect-free.
movq $PAL_EVENT_INTERRUPTED, %rdi

# fall-through to ERESUME

.global eresume_pointer
Expand Down
30 changes: 23 additions & 7 deletions pal/src/host/linux-sgx/pal_exception.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,16 @@
#define ADDR_IN_PAL(addr) ((void*)(addr) > TEXT_START && (void*)(addr) < TEXT_END)

bool g_aex_notify_enabled = false;
uint64_t g_aex_notify_counter = 0;

void init_aex_notify_for_thread(void) {
if (!g_aex_notify_enabled)
return;

SET_ENCLAVE_TCB(ready_for_aex_notify, 1UL);
MB();
#if 0
/*
* FIXME: Re-enable in the following commit, when all AEX-Notify flows are added.
* Currently this would fail, as the untrusted runtime expects AEX-Notify flows but
* in-enclave runtime doesn't yet implement AEX-Notify flows.
*/
GET_ENCLAVE_TCB(gpr)->aexnotify = 1U;
MB();
#endif
}

void fini_aex_notify_for_thread(void) {
Expand All @@ -51,6 +45,18 @@ void fini_aex_notify_for_thread(void) {
MB();
}

static void apply_aex_notify_mitigations(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state) {
/*
* TODO: introduce mitigations like atomic prefetching of the working set, see proposed
* mitigations in academic paper "AEX-Notify: Thwarting Precise Single-Stepping
* Attacks through Interrupt Awareness for Intel SGX Enclaves"
*/
__UNUSED(uc);
__UNUSED(xregs_state);

__atomic_fetch_add(&g_aex_notify_counter, 1, __ATOMIC_RELAXED);
}

/* Restore an sgx_cpu_context_t as generated by .Lhandle_exception. Execution will
* continue as specified by the rip in the context. */
__attribute_no_sanitize_address
Expand All @@ -65,6 +71,16 @@ noreturn static void restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE*
asan_unpoison_current_stack(sig_stack_low, sig_stack_high - sig_stack_low);
#endif

if (g_aex_notify_enabled && GET_ENCLAVE_TCB(ready_for_aex_notify)) {
/*
* AEX-Notify must be re-enabled for this enclave thread before applying any mitigations
* (and consequently before restoring the regular execution of the enclave thread). For
* details, see e.g. the official whitepaper on AEX-Notify from Intel.
*/
GET_ENCLAVE_TCB(gpr)->aexnotify = 1;
apply_aex_notify_mitigations(uc, xregs_state);
}

_restore_sgx_context(uc, xregs_state);
}

Expand Down
1 change: 1 addition & 0 deletions pal/src/host/linux-sgx/pal_linux.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ void restore_xregs(const PAL_XREGS_STATE* xsave_area);
noreturn void _restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xsave_area);

extern bool g_aex_notify_enabled;
extern uint64_t g_aex_notify_counter;
void init_aex_notify_for_thread(void);
void fini_aex_notify_for_thread(void);

Expand Down
10 changes: 10 additions & 0 deletions pal/src/host/linux-sgx/pal_process.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,16 @@ int init_child_process(int parent_stream_fd, PAL_HANDLE* out_parent_handle,
noreturn void _PalProcessExit(int exitcode) {
if (exitcode)
log_debug("PalProcessExit: Returning exit code %d", exitcode);

/*
* FIXME: remove this when proper AEX-Notify mitigations are implemented;
* see pal_exception.c:apply_aex_notify_mitigations()
*/
if (g_aex_notify_enabled) {
uint64_t aex_notify_counter = __atomic_load_n(&g_aex_notify_counter, __ATOMIC_RELAXED);
log_warning("AEX-Notify counter at process exit: %lu", aex_notify_counter);
}

ocall_exit(exitcode, /*is_exitgroup=*/true);
/* Unreachable. */
}
Expand Down

0 comments on commit 5a8651c

Please sign in to comment.