Skip to content

Commit

Permalink
x86_64: Fix "bt" command on kernels with random_kstack_offset=on
Browse files Browse the repository at this point in the history
On kernels configured with CONFIG_RANDOMIZE_KSTACK_OFFSET=y and
random_kstack_offset=on, a random offset is added to task stacks with
__kstack_alloca() at the beginning of do_syscall_64() and other syscall
entry functions.  This eventually does the following instruction.

  <do_syscall_64+32>:  sub    %rax,%rsp

On the other hand, crash uses only a part of data for ORC unwinder to
unwind stacks and if an ip value doesn't have a usable ORC data, it
caluculates the frame size with parsing the assembly of the function.

However, crash cannot calculate the frame size correctly with the
instruction above, and prints stale return addresses like this:

  crash> bt 1
  PID: 1        TASK: ffff9c250023b880  CPU: 0    COMMAND: "systemd"
    #0 [ffffb7e5c001fc80] __schedule at ffffffff91ae2b16
    #1 [ffffb7e5c001fd00] schedule at ffffffff91ae2ed3
    #2 [ffffb7e5c001fd18] schedule_hrtimeout_range_clock at ffffffff91ae7ed8
    #3 [ffffb7e5c001fda8] ep_poll at ffffffff913ef828
    #4 [ffffb7e5c001fe48] do_epoll_wait at ffffffff913ef943
    #5 [ffffb7e5c001fe80] __x64_sys_epoll_wait at ffffffff913f0130
    #6 [ffffb7e5c001fed0] do_syscall_64 at ffffffff91ad7169
    #7 [ffffb7e5c001fef0] do_syscall_64 at ffffffff91ad7179             <<
    #8 [ffffb7e5c001ff10] syscall_exit_to_user_mode at ffffffff91adaab2 << stale entries
    #9 [ffffb7e5c001ff20] do_syscall_64 at ffffffff91ad7179             <<
   #10 [ffffb7e5c001ff50] entry_SYSCALL_64_after_hwframe at ffffffff91c0009b
       RIP: 00007f258d9427ae  RSP: 00007fffda631d60  RFLAGS: 00000293
       ...

To fix this, enhance the use of ORC data.  The ORC unwinder often uses
%rbp value, so keep it from exception frames and inactive task stacks.

Signed-off-by: Kazuhito Hagio <[email protected]>
  • Loading branch information
k-hagio committed Feb 27, 2023
1 parent 59c1981 commit daa43fa
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 35 deletions.
1 change: 1 addition & 0 deletions defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2207,6 +2207,7 @@ struct offset_table { /* stash of commonly-used offsets */
long sock_sk_common;
long sock_common_skc_v6_daddr;
long sock_common_skc_v6_rcv_saddr;
long inactive_task_frame_bp;
};

struct size_table { /* stash of commonly-used sizes */
Expand Down
1 change: 1 addition & 0 deletions symbols.c
Original file line number Diff line number Diff line change
Expand Up @@ -8822,6 +8822,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(task_struct_tss_ksp));
fprintf(fp, " task_struct_thread_eip: %ld\n",
OFFSET(task_struct_thread_eip));
fprintf(fp, " inactive_task_frame_bp: %ld\n", OFFSET(inactive_task_frame_bp));
fprintf(fp, " inactive_task_frame_ret_addr: %ld\n",
OFFSET(inactive_task_frame_ret_addr));
fprintf(fp, " task_struct_thread_esp: %ld\n",
Expand Down
118 changes: 83 additions & 35 deletions x86_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ static int x86_64_do_not_cache_framesize(struct syment *, ulong);
static int x86_64_framesize_cache_func(int, ulong, int *, int, struct syment *);
static ulong x86_64_get_framepointer(struct bt_info *, ulong);
int search_for_eframe_target_caller(struct bt_info *, ulong, int *);
static int x86_64_get_framesize(struct bt_info *, ulong, ulong);
static int x86_64_get_framesize(struct bt_info *, ulong, ulong, char *);
static void x86_64_framesize_debug(struct bt_info *);
static void x86_64_get_active_set(void);
static int x86_64_get_kvaddr_ranges(struct vaddr_range *);
Expand Down Expand Up @@ -3642,7 +3642,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
bt, ofp);
rsp += SIZE(pt_regs); /* guaranteed kernel mode */
if (bt->eframe_ip && ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip, rsp)) >= 0))
bt->eframe_ip, rsp, NULL)) >= 0))
rsp += framesize;
level++;
irq_eframe = 0;
Expand Down Expand Up @@ -3674,7 +3674,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
bt->eframe_ip ? bt->eframe_ip : *up, rsp, NULL)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
Expand Down Expand Up @@ -3747,7 +3747,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
}

level++;
if ((framesize = x86_64_get_framesize(bt, bt->instptr, rsp)) >= 0)
if ((framesize = x86_64_get_framesize(bt, bt->instptr, rsp, NULL)) >= 0)
rsp += framesize;
}
}
Expand Down Expand Up @@ -3799,7 +3799,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
bt->eframe_ip ? bt->eframe_ip : *up, rsp, NULL)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
Expand Down Expand Up @@ -3909,24 +3909,34 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
(STREQ(rip_symbol, "thread_return") ||
STREQ(rip_symbol, "schedule") ||
STREQ(rip_symbol, "__schedule"))) {
if (STREQ(rip_symbol, "__schedule")) {
i = (rsp - bt->stackbase)/sizeof(ulong);
x86_64_print_stack_entry(bt, ofp, level,
i, bt->instptr);
level++;
rsp = __schedule_frame_adjust(rsp, bt);
if (STREQ(closest_symbol(bt->instptr), "schedule"))
if ((machdep->flags & ORC) && VALID_MEMBER(inactive_task_frame_ret_addr)) {
/*
* %rsp should have the address of inactive_task_frame, so
* skip the registers before ret_addr to adjust rsp.
*/
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx rbp: %lx\n", rsp, bt->bptr);
rsp += OFFSET(inactive_task_frame_ret_addr);
} else {
if (STREQ(rip_symbol, "__schedule")) {
i = (rsp - bt->stackbase)/sizeof(ulong);
x86_64_print_stack_entry(bt, ofp, level,
i, bt->instptr);
level++;
rsp = __schedule_frame_adjust(rsp, bt);
if (STREQ(closest_symbol(bt->instptr), "schedule"))
bt->flags |= BT_SCHEDULE;
} else
bt->flags |= BT_SCHEDULE;
} else
bt->flags |= BT_SCHEDULE;

if (bt->flags & BT_SCHEDULE) {
i = (rsp - bt->stackbase)/sizeof(ulong);
x86_64_print_stack_entry(bt, ofp, level,
i, bt->instptr);
bt->flags &= ~(ulonglong)BT_SCHEDULE;
rsp += sizeof(ulong);
level++;

if (bt->flags & BT_SCHEDULE) {
i = (rsp - bt->stackbase)/sizeof(ulong);
x86_64_print_stack_entry(bt, ofp, level,
i, bt->instptr);
bt->flags &= ~(ulonglong)BT_SCHEDULE;
rsp += sizeof(ulong);
level++;
}
}
}

Expand Down Expand Up @@ -3957,7 +3967,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
irq_eframe = 0;
bt->flags |= BT_EFRAME_TARGET;
if (bt->eframe_ip && ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip, rsp)) >= 0))
bt->eframe_ip, rsp, NULL)) >= 0))
rsp += framesize;
bt->flags &= ~BT_EFRAME_TARGET;
}
Expand Down Expand Up @@ -4044,7 +4054,7 @@ x86_64_low_budget_back_trace_cmd(struct bt_info *bt_in)
case BACKTRACE_ENTRY_DISPLAYED:
level++;
if ((framesize = x86_64_get_framesize(bt,
bt->eframe_ip ? bt->eframe_ip : *up, rsp)) >= 0) {
bt->eframe_ip ? bt->eframe_ip : *up, rsp, (char *)up)) >= 0) {
rsp += framesize;
i += framesize/sizeof(ulong);
}
Expand Down Expand Up @@ -4755,7 +4765,8 @@ x86_64_exception_frame(ulong flags, ulong kvaddr, char *local,
bt->instptr = rip;
bt->stkptr = rsp;
bt->bptr = rbp;
}
} else if (machdep->flags & ORC)
bt->bptr = rbp;

if (kvaddr)
FREEBUF(pt_regs_buf);
Expand Down Expand Up @@ -5315,6 +5326,10 @@ x86_64_get_sp(struct bt_info *bt)
OFFSET(thread_struct_rsp), KVADDR,
&rsp, sizeof(void *),
"thread_struct rsp", FAULT_ON_ERROR);
if ((machdep->flags & ORC) && VALID_MEMBER(inactive_task_frame_bp)) {
readmem(rsp + OFFSET(inactive_task_frame_bp), KVADDR, &bt->bptr,
sizeof(void *), "inactive_task_frame.bp", FAULT_ON_ERROR);
}
return rsp;
}

Expand Down Expand Up @@ -6421,6 +6436,9 @@ x86_64_ORC_init(void)
orc->__stop_orc_unwind = symbol_value("__stop_orc_unwind");
orc->orc_lookup = symbol_value("orc_lookup");

MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");

machdep->flags |= ORC;
}

Expand Down Expand Up @@ -8489,7 +8507,7 @@ search_for_eframe_target_caller(struct bt_info *bt, ulong stkptr, int *framesize
(BT_OLD_BACK_TRACE|BT_TEXT_SYMBOLS|BT_TEXT_SYMBOLS_ALL|BT_FRAMESIZE_DISABLE)

static int
x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp)
x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_ptr)
{
int c, framesize, instr, arg, max;
struct syment *sp;
Expand Down Expand Up @@ -8590,19 +8608,49 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp)
if ((machdep->flags & ORC) && (korc = orc_find(textaddr))) {
if (CRASHDEBUG(1)) {
fprintf(fp,
"rsp: %lx textaddr: %lx framesize: %d -> spo: %d bpo: %d spr: %d bpr: %d type: %d %s",
rsp, textaddr, framesize, korc->sp_offset, korc->bp_offset,
korc->sp_reg, korc->bp_reg, korc->type,
(korc->type == ORC_TYPE_CALL) && (korc->sp_reg == ORC_REG_SP) ? "" : "(UNUSED)");
"rsp: %lx textaddr: %lx -> spo: %d bpo: %d spr: %d bpr: %d type: %d",
rsp, textaddr, korc->sp_offset, korc->bp_offset,
korc->sp_reg, korc->bp_reg, korc->type);
if (MEMBER_EXISTS("orc_entry", "end"))
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}

if ((korc->type == ORC_TYPE_CALL) && (korc->sp_reg == ORC_REG_SP)) {
framesize = (korc->sp_offset - 8);
return (x86_64_framesize_cache_func(FRAMESIZE_ENTER, textaddr,
&framesize, exception, NULL));
if (korc->type == ORC_TYPE_CALL) {
ulong prev_sp = 0, prev_bp = 0;
framesize = -1;

if (korc->sp_reg == ORC_REG_SP) {
framesize = (korc->sp_offset - 8);

/* rsp points to a return address, so +8 to use sp_offset */
prev_sp = (rsp + 8) + korc->sp_offset;
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx prev_sp: %lx framesize: %d\n",
rsp, prev_sp, framesize);
} else if ((korc->sp_reg == ORC_REG_BP) && bt->bptr) {
prev_sp = bt->bptr + korc->sp_offset;
framesize = (prev_sp - (rsp + 8) - 8);
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx rbp: %lx prev_sp: %lx framesize: %d\n",
rsp, bt->bptr, prev_sp, framesize);
}

if ((korc->bp_reg == ORC_REG_PREV_SP) && prev_sp) {
prev_bp = prev_sp + korc->bp_offset;
if (stack_ptr && INSTACK(prev_bp, bt)) {
bt->bptr = ULONG(stack_ptr + (prev_bp - rsp));
if (CRASHDEBUG(1))
fprintf(fp, "rsp: %lx prev_sp: %lx prev_bp: %lx -> %lx\n",
rsp, prev_sp, prev_bp, bt->bptr);
} else
bt->bptr = 0;
} else if ((korc->bp_reg != ORC_REG_UNDEFINED))
bt->bptr = 0;

if (framesize >= 0)
/* Do not cache this, possibly it may be variable. */
return framesize;
}
}

Expand Down Expand Up @@ -8758,7 +8806,7 @@ x86_64_framesize_debug(struct bt_info *bt)
if (!bt->hp->eip)
error(INFO, "x86_64_framesize_debug: ignoring command\n");
else
x86_64_get_framesize(bt, bt->hp->eip, 0);
x86_64_get_framesize(bt, bt->hp->eip, 0, NULL);
break;

case -3:
Expand Down

0 comments on commit daa43fa

Please sign in to comment.