From 7d4daf0c035409f677afd87e8d3b3063447dd63e Mon Sep 17 00:00:00 2001 From: "Aureau, Georges (Kernel Tools ERT)" Date: Thu, 4 Apr 2024 14:39:06 +0000 Subject: [PATCH] x86_64: Fix "bt" command to handle IRQ exception frames properly On x86_64, there are cases where crash cannot handle IRQ exception frames properly. For example, with RHEL9.3 kernel, "bt" command fails with with "WARNING possibly bogus exception frame": crash> bt -c 30 PID: 2898241 TASK: ff4cb0ce0da0c680 CPU: 30 COMMAND: "star-ccm+" #0 [fffffe4658d88e58] crash_nmi_callback at ffffffffa00675e8 #1 [fffffe4658d88e68] nmi_handle at ffffffffa002ebab ... --- --- ... #13 [ff5eba269937cf90] __do_softirq at ffffffffa0c6c007 #14 [ff5eba269937cfe0] __irq_exit_rcu at ffffffffa010ef61 #15 [ff5eba269937cff0] sysvec_apic_timer_interrupt at ffffffffa0c58ca2 --- --- RIP: 0000000000000010 RSP: 0000000000000018 RFLAGS: ff5eba26ddc9f7e8 RAX: 0000000000000a20 RBX: ff5eba26ddc9f940 RCX: 0000000000001000 RDX: ffffffb559980000 RSI: ff4cb12d67207400 RDI: ffffffffffffffff RBP: 0000000000001000 R8: ff5eba26ddc9f940 R9: ff5eba26ddc9f8af R10: 0000000000000003 R11: 0000000000000a20 R12: ff5eba26ddc9f8b0 R13: 000000283c07f000 R14: ff4cb0f5a29a1c00 R15: 0000000000000001 ORIG_RAX: ffffffffa07c4e60 CS: 0206 SS: 7000001cf0380001 bt: WARNING: possibly bogus exception frame Running "crash" with "--machdep irq_eframe_link=0xffffffffffffffe8" option (i.e. thus irq_eframe_link = -24) works properly: PID: 2898241 TASK: ff4cb0ce0da0c680 CPU: 30 COMMAND: "star-ccm+" #0 [fffffe4658d88e58] crash_nmi_callback at ffffffffa00675e8 #1 [fffffe4658d88e68] nmi_handle at ffffffffa002ebab ... --- --- ... #13 [ff5eba269937cf90] __do_softirq at ffffffffa0c6c007 #14 [ff5eba269937cfe0] __irq_exit_rcu at ffffffffa010ef61 #15 [ff5eba269937cff0] sysvec_apic_timer_interrupt at ffffffffa0c58ca2 --- --- #16 [ff5eba26ddc9f738] asm_sysvec_apic_timer_interrupt at ffffffffa0e00e06 [exception RIP: alloc_pte.constprop.0+32] RIP: ffffffffa07c4e60 RSP: ff5eba26ddc9f7e8 RFLAGS: 00000206 RAX: ff5eba26ddc9f940 RBX: 0000000000001000 RCX: 0000000000000a20 RDX: 0000000000001000 RSI: ffffffb559980000 RDI: ff4cb12d67207400 RBP: ff5eba26ddc9f8b0 R8: ff5eba26ddc9f8af R9: 0000000000000003 R10: 0000000000000a20 R11: ff5eba26ddc9f940 R12: 000000283c07f000 R13: ff4cb0f5a29a1c00 R14: 0000000000000001 R15: ff4cb0f5a29a1bf8 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #17 [ff5eba26ddc9f830] iommu_v1_map_pages at ffffffffa07c5648 #18 [ff5eba26ddc9f8f8] __iommu_map at ffffffffa07d7803 #19 [ff5eba26ddc9f990] iommu_map_sg at ffffffffa07d7b71 #20 [ff5eba26ddc9f9f0] iommu_dma_map_sg at ffffffffa07ddcc9 #21 [ff5eba26ddc9fa90] __dma_map_sg_attrs at ffffffffa01b5205 ... Some background: asm_common_interrupt: callq error_entry movq %rax,%rsp movq %rsp,%rdi movq 0x78(%rsp),%rsi movq $-0x1,0x78(%rsp) call common_interrupt # rsp pointing to regs common_interrupt: pushq %r12 pushq %rbp pushq %rbx [...] movq hardirq_stack_ptr,%r11 movq %rsp,(%r11) movq %r11,%rsp [...] call __common_interrupt # rip:common_interrupt So frame_size(rip:common_interrupt) = 32 (3 push + ret). Hence "machdep->machspec->irq_eframe_link = -32;" (see x86_64_irq_eframe_link_init()). Now: asm_sysvec_apic_timer_interrupt: pushq $-0x1 callq error_entry movq %rax,%rsp movq %rsp,%rdi callq sysvec_apic_timer_interrupt sysvec_apic_timer_interrupt: pushq %r12 pushq %rbp [...] movq hardirq_stack_ptr,%r11 movq %rsp,(%r11) movq %r11,%rsp [...] call __sysvec_apic_timer_interrupt # rip:sysvec_apic_timer_interrupt Here frame_size(rip:sysvec_apic_timer_interrupt) = 24 (2 push + ret) We should also notice that: rip = *(hardirq_stack_ptr - 8) rsp = *(hardirq_stack_ptr) regs = rsp - frame_size(rip) But x86_64_get_framesize() does not work with IRQ handlers (returns 0). So not many options other than hardcoding the most likely value and looking around it. Actually x86_64_irq_eframe_link() was trying -32 (default), and then -40, but not -24. Signed-off-by: Georges Aureau --- x86_64.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/x86_64.c b/x86_64.c index aec82b03..0c21eb82 100644 --- a/x86_64.c +++ b/x86_64.c @@ -6623,13 +6623,14 @@ x86_64_irq_eframe_link_init(void) /* * Calculate and verify the IRQ exception frame location from the - * stack reference at the top of the IRQ stack, possibly adjusting - * the ms->irq_eframe_link value. + * stack reference at the top of the IRQ stack, keep ms->irq_eframe_link + * as the most likely value, and try a few sizes around it. */ static ulong x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp) { ulong irq_eframe; + int i, try[] = { 8, -8, 16, -16 }; if (x86_64_exception_frame(EFRAME_VERIFY, stkref, 0, bt, ofp)) return stkref; @@ -6639,9 +6640,9 @@ x86_64_irq_eframe_link(ulong stkref, struct bt_info *bt, FILE *ofp) if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe, 0, bt, ofp)) return irq_eframe; - if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe+8, 0, bt, ofp)) { - machdep->machspec->irq_eframe_link -= 8; - return (irq_eframe + 8); + for (i = 0; i < sizeof(try)/sizeof(int); i++) { + if (x86_64_exception_frame(EFRAME_VERIFY, irq_eframe+try[i], 0, bt, ofp)) + return (irq_eframe + try[i]); } return irq_eframe;