-
Notifications
You must be signed in to change notification settings - Fork 201
/
pal_exception.c
639 lines (577 loc) · 27.8 KB
/
pal_exception.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
/* SPDX-License-Identifier: LGPL-3.0-or-later */
/* Copyright (C) 2014 Stony Brook University
* 2020 Intel Labs
* 2024 Intel Corporation
* Kailun Qin <[email protected]>
*/
/*
* This file contains APIs to set up signal handlers.
*/
#include <stddef.h> /* needed by <linux/signal.h> for size_t */
#include <linux/signal.h>
#include "api.h"
#include "asan.h"
#include "cpu.h"
#include "pal.h"
#include "pal_internal.h"
#include "pal_linux.h"
#include "pal_sgx.h"
#define ADDR_IN_PAL(addr) ((void*)(addr) > TEXT_START && (void*)(addr) < TEXT_END)
bool g_aex_notify_enabled = false;
uint64_t g_aex_notify_counter = 0;
void init_aex_notify_for_thread(void) {
if (!g_aex_notify_enabled)
return;
SET_ENCLAVE_TCB(ready_for_aex_notify, 1UL);
MB();
GET_ENCLAVE_TCB(gpr)->aexnotify = 1U;
MB();
}
void fini_aex_notify_for_thread(void) {
if (!g_aex_notify_enabled)
return;
/*
* Order is important: first the stage-2 signal handler must be informed to *not* re-enable
* AEX-Notify for this thread, then AEX-Notify must be disabled for this thread from the HW
* perspective (so that it doesn't morph ERESUME into EENTER), and finally AEX-Notify must be
* disabled from the SW perspective (so that it doesn't try EDECCSSA instead of EEXIT).
*
* Without `stopping_aex_notify`, a signal could arrive and force `restore_sgx_context()` to
* re-enable AEX-Notify, even if unsetting `aexnotify` was executed in the meantime.
*
* If `ready_for_aex_notify` would be unset before unsetting `aexnotify`, then the HW could
* morph ERESUME into EENTER, and the flow enclave_entry.S:Lcssa1_exception_eexit would choose
* the no-AEX-Notify path and perform EEXIT, which is unsupported in Gramine (Gramine assumes
* that ERESUME never returns).
*/
SET_ENCLAVE_TCB(stopping_aex_notify, 1UL);
MB();
GET_ENCLAVE_TCB(gpr)->aexnotify = 0U;
MB();
SET_ENCLAVE_TCB(ready_for_aex_notify, 0UL);
MB();
}
static void apply_aex_notify_mitigations(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state) {
/*
* TODO: introduce mitigations like atomic prefetching of the working set, see proposed
* mitigations in academic paper "AEX-Notify: Thwarting Precise Single-Stepping
* Attacks through Interrupt Awareness for Intel SGX Enclaves"
*/
__UNUSED(uc);
__UNUSED(xregs_state);
__atomic_fetch_add(&g_aex_notify_counter, 1, __ATOMIC_RELAXED);
}
/* Restore an sgx_cpu_context_t as generated by .Lhandle_exception. Execution will
* continue as specified by the rip in the context. */
__attribute_no_sanitize_address
noreturn static void restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xregs_state) {
if (xregs_state == NULL)
xregs_state = (PAL_XREGS_STATE*)g_xsave_reset_state;
#ifdef ASAN
/* Unpoison the signal stack before leaving it */
uintptr_t sig_stack_low = GET_ENCLAVE_TCB(sig_stack_low);
uintptr_t sig_stack_high = GET_ENCLAVE_TCB(sig_stack_high);
asan_unpoison_current_stack(sig_stack_low, sig_stack_high - sig_stack_low);
#endif
if (g_aex_notify_enabled && GET_ENCLAVE_TCB(ready_for_aex_notify)
&& !GET_ENCLAVE_TCB(stopping_aex_notify)) {
/*
* AEX-Notify must be re-enabled for this enclave thread before applying any mitigations
* (and consequently before restoring the regular execution of the enclave thread). For
* details, see e.g. the official whitepaper on AEX-Notify from Intel.
*/
GET_ENCLAVE_TCB(gpr)->aexnotify = 1;
apply_aex_notify_mitigations(uc, xregs_state);
}
_restore_sgx_context(uc, xregs_state);
}
noreturn static void restore_pal_context(sgx_cpu_context_t* uc, PAL_CONTEXT* ctx) {
uc->rax = ctx->rax;
uc->rbx = ctx->rbx;
uc->rcx = ctx->rcx;
uc->rdx = ctx->rdx;
uc->rsp = ctx->rsp;
uc->rbp = ctx->rbp;
uc->rsi = ctx->rsi;
uc->rdi = ctx->rdi;
uc->r8 = ctx->r8;
uc->r9 = ctx->r9;
uc->r10 = ctx->r10;
uc->r11 = ctx->r11;
uc->r12 = ctx->r12;
uc->r13 = ctx->r13;
uc->r14 = ctx->r14;
uc->r15 = ctx->r15;
uc->rflags = ctx->efl;
uc->rip = ctx->rip;
restore_sgx_context(uc, ctx->is_fpregs_used ? ctx->fpregs : NULL);
}
static void save_pal_context(PAL_CONTEXT* ctx, sgx_cpu_context_t* uc,
PAL_XREGS_STATE* xregs_state) {
memset(ctx, 0, sizeof(*ctx));
ctx->rax = uc->rax;
ctx->rbx = uc->rbx;
ctx->rcx = uc->rcx;
ctx->rdx = uc->rdx;
ctx->rsp = uc->rsp;
ctx->rbp = uc->rbp;
ctx->rsi = uc->rsi;
ctx->rdi = uc->rdi;
ctx->r8 = uc->r8;
ctx->r9 = uc->r9;
ctx->r10 = uc->r10;
ctx->r11 = uc->r11;
ctx->r12 = uc->r12;
ctx->r13 = uc->r13;
ctx->r14 = uc->r14;
ctx->r15 = uc->r15;
ctx->efl = uc->rflags;
ctx->rip = uc->rip;
union pal_csgsfs csgsfs = {
.cs = 0x33, // __USER_CS(5) | 0(GDT) | 3(RPL)
.fs = 0,
.gs = 0,
.ss = 0x2b, // __USER_DS(6) | 0(GDT) | 3(RPL)
};
ctx->csgsfsss = csgsfs.csgsfs;
assert(xregs_state);
ctx->fpregs = xregs_state;
ctx->is_fpregs_used = 1;
/* Emulate format for fp registers Linux sets up as signal frame.
* https://elixir.bootlin.com/linux/v5.4.13/source/arch/x86/kernel/fpu/signal.c#L86
* https://elixir.bootlin.com/linux/v5.4.13/source/arch/x86/kernel/fpu/signal.c#L459
*/
PAL_FPX_SW_BYTES* fpx_sw = &xregs_state->fpstate.sw_reserved;
fpx_sw->magic1 = PAL_FP_XSTATE_MAGIC1;
fpx_sw->extended_size = g_xsave_size;
fpx_sw->xfeatures = g_xsave_features;
memset(fpx_sw->padding, 0, sizeof(fpx_sw->padding));
if (g_xsave_enabled) {
fpx_sw->xstate_size = g_xsave_size + PAL_FP_XSTATE_MAGIC2_SIZE;
*(__typeof__(PAL_FP_XSTATE_MAGIC2)*)((void*)xregs_state + g_xsave_size) =
PAL_FP_XSTATE_MAGIC2;
} else {
fpx_sw->xstate_size = g_xsave_size;
}
}
static void emulate_rdtsc_and_print_warning(sgx_cpu_context_t* uc) {
if (FIRST_TIME()) {
/* if we end up emulating RDTSC/RDTSCP instruction, we cannot use invariant TSC */
extern uint64_t g_tsc_hz;
g_tsc_hz = 0;
log_warning("all RDTSC/RDTSCP instructions are emulated (imprecisely) via gettime() "
"syscall.");
}
uint64_t usec;
int res = _PalSystemTimeQuery(&usec);
if (res < 0) {
log_error("_PalSystemTimeQuery() failed in unrecoverable context, exiting.");
_PalProcessExit(1);
}
/* FIXME: Ideally, we would like to scale microseconds back to RDTSC clock cycles */
uc->rdx = (uint32_t)(usec >> 32);
uc->rax = (uint32_t)usec;
}
static void emulate_iret_and_print_warning(sgx_cpu_context_t* uc) {
#ifndef __x86_64__
#error "The iret emulation is unsupported on other platforms."
#endif
if (FIRST_TIME()) {
log_warning("Emulating a raw iret instruction. This degrades performance.");
}
uc->rip = *(uint64_t*)(intptr_t)uc->rsp;
uc->rsp += 8;
/* Assume that cs register doesn't change. */
#ifdef DEBUG
uint64_t cs = *(uint64_t*)(intptr_t)uc->rsp;
uint64_t cur_cs = 0;
__asm__ volatile (
"movq %%cs, %0\n"
: "=r"(cur_cs)
);
assert(cs == cur_cs);
#endif
uc->rsp += 8;
uc->rflags = *(uint64_t*)(intptr_t)uc->rsp;
uc->rsp += 8;
uint64_t tmprsp = *(uint64_t*)(intptr_t)uc->rsp;
uc->rsp += 8;
/* Assume that ss register doesn't change. */
#ifdef DEBUG
uint64_t ss = *(uint64_t*)(intptr_t)uc->rsp;
uint64_t cur_ss = 0;
__asm__ volatile (
"movq %%ss, %0\n"
: "=r"(cur_ss)
);
assert(ss == cur_ss);
#endif
uc->rsp += 8;
uc->rsp = tmprsp;
}
/* return value: true if #UD was handled and execution can be continued without propagating #UD;
* false if #UD was not handled and exception needs to be raised up to LibOS/app */
static bool handle_ud(sgx_cpu_context_t* uc, int* out_event_num) {
/* most unhandled #UD faults are translated and sent to LibOS/app as "Illegal instruction"
* exceptions; however some #UDs (e.g. triggered due to IN/OUT/INS/OUTS) must be translated as
* "Memory fault" exceptions */
*out_event_num = PAL_EVENT_ILLEGAL;
uint8_t* instr = (uint8_t*)uc->rip;
if (instr[0] == 0x0f && instr[1] == 0xa2) {
/* cpuid */
unsigned int values[4];
if (!_PalCpuIdRetrieve(uc->rax & 0xffffffff, uc->rcx & 0xffffffff, values)) {
uc->rip += 2;
uc->rax = values[0];
uc->rbx = values[1];
uc->rcx = values[2];
uc->rdx = values[3];
return true;
}
} else if (instr[0] == 0x0f && instr[1] == 0x31) {
/* rdtsc */
emulate_rdtsc_and_print_warning(uc);
uc->rip += 2;
return true;
} else if (instr[0] == 0x0f && instr[1] == 0x01 && instr[2] == 0xf9) {
/* rdtscp */
emulate_rdtsc_and_print_warning(uc);
uc->rip += 3;
uc->rcx = 0; /* dummy IA32_TSC_AUX; Linux encodes it as (numa_id << 12) | cpu_id */
return true;
} else if (0x48 <= instr[0] && instr[0] <= 0x4F && instr[1] == 0xcf) {
/*
* The IRETQ (interrupt return, 64-bit operand size) is prefixed with REX.W (bit 3).
* From Intel manual:
* REX prefixes are a set of 16 opcodes that span one row of the opcode map and occupy
* entries 40H to 4FH.
*/
emulate_iret_and_print_warning(uc);
return true;
} else if (instr[0] == 0xf3 && (instr[1] & ~1) == 0x48 && instr[2] == 0x0f &&
instr[3] == 0xae && instr[4] >> 6 == 0b11 && ((instr[4] >> 3) & 0b111) < 4) {
/* A disabled {RD,WR}{FS,GS}BASE instruction generated a #UD */
log_error(
"{RD,WR}{FS,GS}BASE instructions are not permitted on this platform. Please check the "
"instructions under \"Building with SGX support\" from Gramine documentation.");
return false;
} else if (instr[0] == 0x0f && instr[1] == 0x05) {
/* syscall: LibOS may know how to handle this */
if (FIRST_TIME()) {
log_always("Emulating a raw syscall instruction. This degrades performance, consider"
" patching your application to use Gramine syscall API.");
}
char buf[LOCATION_BUF_SIZE];
pal_describe_location(uc->rip, buf, sizeof(buf));
log_trace("Emulating raw syscall instruction with number %lu at address %s", uc->rax, buf);
return false;
} else if (is_in_out(instr) && !has_lock_prefix(instr)) {
/*
* Executing I/O instructions (e.g., IN/OUT/INS/OUTS) inside an SGX enclave generates a #UD
* fault. Without the below corner-case handling, PAL would propagate this fault to LibOS as
* an "Illegal instruction" Gramine exception. However, I/O instructions result in a #GP
* fault outside SGX (which corresponds to "Memory fault" Gramine exception) if I/O is not
* permitted (which is true in userspace apps). Let PAL emulate these instructions as if
* they ended up in a memory fault.
*
* Note that I/O instructions with a LOCK prefix always result in a #UD fault, so they are
* special-cased here.
*/
if (FIRST_TIME()) {
log_warning("Emulating In/OUT/INS/OUTS instruction as a SIGSEGV signal to app.");
}
*out_event_num = PAL_EVENT_MEMFAULT;
return false;
}
char buf[LOCATION_BUF_SIZE];
pal_describe_location(uc->rip, buf, sizeof(buf));
log_warning("Unknown or illegal instruction executed at %s", buf);
return false;
}
/* TODO: remove this once the Linux kernel is patched. */
#ifndef LINUX_KERNEL_SGX_EDMM_DATA_RACES_PATCHED
static bool is_eaccept_instr(sgx_cpu_context_t* uc) {
/* instruction must be ENCLU and leaf must be EACCEPT */
uint8_t* instr = (uint8_t*)uc->rip;
if (instr[0] == 0x0f && instr[1] == 0x01 && instr[2] == 0xd7 && uc->rax == EACCEPT)
return true;
return false;
}
#endif
/* perform exception handling inside the enclave */
void _PalExceptionHandler(uint32_t trusted_exit_info_,
uint32_t untrusted_external_event, sgx_cpu_context_t* uc,
PAL_XREGS_STATE* xregs_state, sgx_arch_exinfo_t* exinfo) {
assert(IS_ALIGNED_PTR(xregs_state, PAL_XSTATE_ALIGN));
sgx_arch_exit_info_t trusted_exit_info;
static_assert(sizeof(trusted_exit_info) == sizeof(trusted_exit_info_), "invalid size");
memcpy(&trusted_exit_info, &trusted_exit_info_, sizeof(trusted_exit_info));
/*
* Intel SGX hardware exposes information on a HW exception in the EXITINFO struct.
* Host OS + Gramine's untrusted part of PAL deliver a SW signal. The SW signal can be a
* reaction to HW exception (synchronous signal) or a reaction to software events (asynchronous
* signal). For security, it is important to cross-check HW exception state vs SW signal state.
*
* The below table shows the cross checks. "yes" means allowed combination, "no" means
* prohibited combination (Gramine terminates). "yes*" means a special case of #PF, see comments
* below on #PF handling.
*
* +-----------------------------+-----+-----+-----+-----+------------------+------------+
* | HW exceptions (trusted) -> | | #DE | | | | |
* | --------------------------- | | #MF | | #GP | others | none |
* | SW signals (untrusted) | | #UD | #XM | #PF | #AC | (#BR,#DB,#BP,#CP)| (valid=0) |
* | v | | | | | | |
* --+-----------------------------+-----+-----+-----+-----+------------------+------------+
* s | | | | | | | |
* y | PAL_EVENT_ILLEGAL | yes | no | no | no | | |
* n | | | | | | | |
* c +-----------------------------+-----+-----+-----+-----+ no | no |
* h | | | | | | (exceptions | (malicious |
* r | PAL_EVENT_ARITHMETIC_ERROR | no | yes | no | no | unsupported | host |
* o | | | | | | by Gramine) | injected |
* n +-----------------------------+-----+-----+-----+-----+ | SW signal)|
* o | | | | | | | |
* u | PAL_EVENT_MEMFAULT | no | no |yes* | yes | | |
* s | | | | | | | |
* --+-----------------------------+-----+-----+-----+-----+------------------+------------+
* | | | |
* a | PAL_EVENT_QUIT | | yes |
* s | | no, except #PF case* | |
* y +-----------------------------+ (malicious host ignored HW exception) +------------+
* n | | | |
* c | PAL_EVENT_INTERRUPTED | | yes |
* | | | |
* --+-----------------------------+------------------------------------------+------------+
*/
bool is_synthetic_gp = false; /* IN/OUT/INS/OUTS instructions morph #UD into a synthetic #GP */
uint32_t event_num = 0; /* illegal event */
if (!trusted_exit_info.valid) {
/* corresponds to last column in the table above */
if (untrusted_external_event != PAL_EVENT_QUIT
&& untrusted_external_event != PAL_EVENT_INTERRUPTED) {
if (untrusted_external_event == PAL_EVENT_MEMFAULT
&& g_pal_linuxsgx_state.memfaults_without_exinfo_allowed) {
/*
* NOTE: Old CPUs may have SGX without the EXINFO feature, thus they do not
* report/reflect #PF and #GP exceptions in the trusted EXITINFO struct. In some
* situations (debugging using older CPUs) we don't want to terminate immediately.
* Instead we propagate this reported-by-host and possibly malicious exception to
* the app, with MADDR (faulting addr) and ERRCD (error code) set to zeros.
*
* This is enabled via an (insecure) manifest option and will be removed in the near
* future.
*/
memset(&trusted_exit_info, 0, sizeof(trusted_exit_info));
} else {
log_error("Host injected malicious signal %u", untrusted_external_event);
_PalProcessExit(1);
}
}
event_num = untrusted_external_event;
} else {
/* corresponds to all but last columns in the table above */
const char* exception_name = NULL;
switch (trusted_exit_info.vector) {
case SGX_EXCEPTION_VECTOR_UD:
if (untrusted_external_event != PAL_EVENT_ILLEGAL) {
log_error("Host reported mismatching signal (expected %u, got %u)",
PAL_EVENT_ILLEGAL, untrusted_external_event);
_PalProcessExit(1);
}
int event_num_from_handle_ud;
if (handle_ud(uc, &event_num_from_handle_ud)) {
restore_sgx_context(uc, xregs_state);
/* UNREACHABLE */
}
assert(event_num_from_handle_ud == PAL_EVENT_ILLEGAL
|| event_num_from_handle_ud == PAL_EVENT_MEMFAULT);
if (event_num_from_handle_ud == PAL_EVENT_MEMFAULT) {
/* it's a #UD on IN/OUT/INS/OUTS instructions, morphed into a #GP in handle_ud()
* logic: adjust exception info sent to LibOS to mimic a #GP (see code below) */
is_synthetic_gp = true;
}
event_num = event_num_from_handle_ud;
break;
case SGX_EXCEPTION_VECTOR_DE:
case SGX_EXCEPTION_VECTOR_MF:
case SGX_EXCEPTION_VECTOR_XM:
if (untrusted_external_event != PAL_EVENT_ARITHMETIC_ERROR) {
log_error("Host reported mismatching signal (expected %u, got %u)",
PAL_EVENT_ARITHMETIC_ERROR, untrusted_external_event);
_PalProcessExit(1);
}
event_num = PAL_EVENT_ARITHMETIC_ERROR;
break;
case SGX_EXCEPTION_VECTOR_PF:
if (untrusted_external_event == PAL_EVENT_QUIT
|| untrusted_external_event == PAL_EVENT_INTERRUPTED) {
/*
* The host delivered an asynchronous signal, so the reported-by-SGX #PF must be
* benign (resolved completely by the host kernel), otherwise the host would
* deliver PAL_EVENT_MEMFAULT (to signify a #PF which should be acted upon by
* Gramine).
*
* The SGX hardware always reports such benign #PFs though they can be
* considered spurious and should be ignored. So the event must be a
* host-induced external event, so in the following we handle this external
* event and ignore the #PF info.
*
* Note that the host could modify a real memory fault (a valid #PF) to e.g. a
* PAL_EVENT_INTERRUPTED signal. Then we end up in this special case and the app
* will not handle a real memory fault but a dummy PAL_EVENT_INTERRUPTED. This
* will lead to the app getting stuck on #PF. Since this is a DoS, and Intel SGX
* and Gramine don't care about DoSes, this special case is benign.
*/
memset(&trusted_exit_info, 0, sizeof(trusted_exit_info));
event_num = untrusted_external_event;
break;
}
/* fallthrough */
case SGX_EXCEPTION_VECTOR_GP:
case SGX_EXCEPTION_VECTOR_AC:
if (untrusted_external_event != PAL_EVENT_MEMFAULT) {
log_error("Host reported mismatching signal (expected %u, got %u)",
PAL_EVENT_MEMFAULT, untrusted_external_event);
_PalProcessExit(1);
}
event_num = PAL_EVENT_MEMFAULT;
break;
case SGX_EXCEPTION_VECTOR_BR:
exception_name = exception_name ? : "#BR";
/* fallthrough */
case SGX_EXCEPTION_VECTOR_DB:
exception_name = exception_name ? : "#DB";
/* fallthrough */
case SGX_EXCEPTION_VECTOR_BP:
exception_name = exception_name ? : "#BP";
/* fallthrough */
case SGX_EXCEPTION_VECTOR_CP:
exception_name = exception_name ? : "#CP";
/* fallthrough */
default:
log_error("Handling %s exceptions is currently unsupported by Gramine",
exception_name ? : "[unknown]");
_PalProcessExit(1);
/* UNREACHABLE */
}
}
if (event_num == 0 || event_num >= PAL_EVENT_NUM_BOUND) {
log_error("Illegal exception reported: %d", event_num);
_PalProcessExit(1);
}
bool async_event = event_num == PAL_EVENT_QUIT || event_num == PAL_EVENT_INTERRUPTED;
bool memfault_with_edmm = !is_synthetic_gp && event_num == PAL_EVENT_MEMFAULT &&
g_pal_linuxsgx_state.edmm_enabled;
/* in PAL, and event isn't asynchronous (i.e., synchronous exception) or memory fault with EDMM
* enabled (which could happen legitimately when some syscalls try to access user buffers
* that're created using mappings with `MAP_NORESERVE`) -- this should be handled later in the
* lazy-allocation logic */
if (ADDR_IN_PAL(uc->rip) && !async_event && !memfault_with_edmm) {
char buf[LOCATION_BUF_SIZE];
pal_describe_location(uc->rip, buf, sizeof(buf));
const char* event_name = pal_event_name(event_num);
log_error("Unexpected %s occurred inside PAL (%s)", event_name, buf);
if (trusted_exit_info.valid) {
/* EXITINFO field: vector = exception number, exit_type = 0x3 for HW / 0x6 for SW */
log_debug("(SGX HW reported AEX vector 0x%x with exit_type = 0x%x)",
trusted_exit_info.vector, trusted_exit_info.exit_type);
} else {
log_debug("(untrusted PAL sent PAL event 0x%x)", untrusted_external_event);
}
_PalProcessExit(1);
}
PAL_CONTEXT ctx = { 0 };
save_pal_context(&ctx, uc, xregs_state);
bool has_hw_fault_address = false;
if (trusted_exit_info.valid) {
ctx.trapno = trusted_exit_info.vector;
/* Only these two exceptions save information in EXINFO. */
if (!is_synthetic_gp && (trusted_exit_info.vector == SGX_EXCEPTION_VECTOR_GP
|| trusted_exit_info.vector == SGX_EXCEPTION_VECTOR_PF)) {
ctx.err = exinfo->error_code_val; /* bits: Present, Write/Read, User/Kernel, etc. */
ctx.cr2 = exinfo->maddr; /* NOTE: on #GP, maddr = 0 */
has_hw_fault_address = true;
}
}
uintptr_t addr = 0;
switch (event_num) {
case PAL_EVENT_ILLEGAL:
addr = uc->rip;
break;
case PAL_EVENT_MEMFAULT:
if (!has_hw_fault_address && !is_synthetic_gp
&& !g_pal_linuxsgx_state.memfaults_without_exinfo_allowed) {
log_error("Tried to handle a memory fault with no faulting address reported by "
"SGX. Please consider enabling 'sgx.use_exinfo' in the manifest.");
_PalProcessExit(1);
}
addr = ctx.cr2;
break;
default:
break;
}
if (memfault_with_edmm) {
/* EDMM lazy allocation */
assert(g_mem_bkeep_get_vma_info_upcall);
assert(ctx.err);
#ifndef LINUX_KERNEL_SGX_EDMM_DATA_RACES_PATCHED
if (!(ctx.err & ERRCD_P) && is_eaccept_instr(uc)) {
/*
* Corner case of a #PF on a non-present page during EACCEPT: this is a benign spurious
* #PF that will be resolved completely by the host kernel.
*
* This is due to a data race in the SGX driver where two enclave threads may try to
* access the same non-present enclave page simultaneously, see below for details:
* https://lore.kernel.org/lkml/[email protected].
*
* TODO: remove this workaround once the Linux kernel is patched.
*/
goto out;
}
#endif
pal_prot_flags_t prot_flags;
if (g_mem_bkeep_get_vma_info_upcall(addr, &prot_flags) == 0) {
prot_flags &= ~PAL_PROT_LAZYALLOC;
if (((ctx.err & ERRCD_W) && !(prot_flags & PAL_PROT_WRITE)) ||
((ctx.err & ERRCD_I) && !(prot_flags & PAL_PROT_EXEC)) ||
/* This checks insufficient read access, e.g., reading a `PROT_NONE` page or the
* eXecute-Only-Memory (XOM) (specified with `PROT_EXEC` alone). Note that on Linux,
* `PROT_READ` is not required to be set when `PROT_WRITE` or `PROT_EXEC` are set.
* Since we're in SGX EDMM PAL, a memfault is propagated when reading the XOM. */
(!(ctx.err & ERRCD_W) && !(ctx.err & ERRCD_I) && !(prot_flags & PAL_PROT_READ)) ||
(ctx.err & ERRCD_PK) || (ctx.err & ERRCD_SS)) {
/* the memfault can be caused by e.g. insufficient access rights rather than page
* not existing, which should be propagated in this case */
goto propagate_memfault;
}
/* The page's set/unset status will be double-checked against the status recorded in the
* enclave page tracker, and if it has already been committed, the page will be skipped.
* See `walk_pages()` in "pal/src/host/linux-sgx/enclave_edmm.c" for details.
*
* This avoids a potential security issue where a malicious host could trick us into
* committing the page twice (which would effectively allow the host to replace a
* lazily-allocated page with 0s) by removing the page and forcing a page fault. */
int ret = commit_lazy_alloc_pages(ALLOC_ALIGN_DOWN_PTR(addr), /*count=*/1, prot_flags);
if (ret < 0) {
log_error("failed to lazily allocate page at 0x%lx: %s", addr, pal_strerror(ret));
_PalProcessExit(1);
}
goto out;
} else if (ADDR_IN_PAL(uc->rip)) {
/* inside PAL, and we failed to get the VMA info of the faulting address or we hit a
* memfault on a not lazily-allocated page */
char buf[LOCATION_BUF_SIZE];
pal_describe_location(uc->rip, buf, sizeof(buf));
log_error("Unexpected memory fault occurred inside PAL (%s)", buf);
_PalProcessExit(1);
}
/* propagate the unhandled memfaults to LibOS via upcall */
}
propagate_memfault:;
pal_event_handler_t upcall = _PalGetExceptionHandler(event_num);
if (upcall) {
(*upcall)(ADDR_IN_PAL(uc->rip), addr, &ctx);
}
out:
restore_pal_context(uc, &ctx);
}