Skip to content

Commit

Permalink
Fix ppc32 on Linux musl, NetBSD, OpenBSD; fixes #120
Browse files Browse the repository at this point in the history
This fixes fcontext on my PowerBook G4 running Void Linux
ppc-musl-20190901, NetBSD/macppc 8.1, or OpenBSD/macppc 6.6-current,
all with g++.  These systems use fcontext for *ppc32_sysv_elf*
(PowerPC 32-bit System V ELF).  The assembly code was wrong for BSD
and crashing on Linux musl.

Linux returns a transfer_t in memory (through a hidden pointer in R3),
but other systems (at least NetBSD and OpenBSD) return a transfer_t in
registers R3:R4.  jump_fcontext() and ontop_fcontext() were always
using the hidden pointer.  Add checks for `#ifdef__linux__`; start
using R3:R4 on other systems.

make_fcontext() was calling _exit(0) through the insecure BSS PLT.
Set R30 to use the secure PLT.  This prevents a crash when musl's
ld.so loads the executable; musl seems to require the secure PLT.

Fix ontop_fcontext() to restore the hidden pointer on Linux.  It was
passing the wrong context's hidden pointer to the ontop-function fn(),
so fn() returned a transfer_t to the wrong stack.  When fn() was
context_exit() in <boost/context/continuation_fcontext.hpp>, it freed
the old stack, then returned `transfer_t{ nullptr, nullptr }` to free
memory.  This crashed on Linux musl.

Now that ontop_fcontext() restores the hidden pointer, it must stop
abusing the same pointer to pass a transfer_t argument to fn().  Add a
new ontop_fcontext_tail() in C++, which takes arguments in registers
and allocates a transfer_t.  The code is in C++ so it can free the
transfer_t argument if fn() throws a C++ exception.

Rearrange the context frame to shrink it from 244 to 240 bytes.  This
fixes the stack alignment: the ABI requires R1 % 16 == 0, and
make_fcontext() respects this, but jump_fcontext() was adding 244 to
R1, so the new context ran with a misaligned stack (244 % 16 == 4).

Remove R13 from the context frame, so new contexts stop loading R13
with garbage.  The ABI uses R13 to point to the executable's small
data, so R13 should have the same value in every context.

Add the backchain to the context frame; make room by moving LR to the
caller's frame.  Order CR, R14 to R31, F14 to F31 at the frame's end,
as is typical for this ABI.  Provide 8-byte alignment for FPSCR and
F14 to F31, to avoid a misalignment penalty.
  • Loading branch information
kernigh committed Oct 19, 2019
1 parent a4bdc60 commit df8fb6b
Show file tree
Hide file tree
Showing 5 changed files with 373 additions and 370 deletions.
2 changes: 2 additions & 0 deletions build/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ alias asm_sources
: asm/make_ppc32_sysv_elf_gas.S
asm/jump_ppc32_sysv_elf_gas.S
asm/ontop_ppc32_sysv_elf_gas.S
asm/tail_ppc32_sysv_elf_gas.cpp
: <abi>sysv
<address-model>32
<architecture>power
Expand All @@ -287,6 +288,7 @@ alias asm_sources
: asm/make_ppc32_sysv_elf_gas.S
asm/jump_ppc32_sysv_elf_gas.S
asm/ontop_ppc32_sysv_elf_gas.S
asm/tail_ppc32_sysv_elf_gas.cpp
: <abi>sysv
<address-model>32
<architecture>power
Expand Down
294 changes: 144 additions & 150 deletions src/asm/jump_ppc32_sysv_elf_gas.S
Original file line number Diff line number Diff line change
Expand Up @@ -5,71 +5,48 @@
http://www.boost.org/LICENSE_1_0.txt)
*/

/******************************************************
/*******************************************************
* *
* ------------------------------------------------- *
* | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
* ------------------------------------------------- *
* | 0 | 4 | 8 | 12 | 16 | 20 | 24 | 28 | *
* ------------------------------------------------- *
* | F14 | F15 | F16 | F17 | *
* ------------------------------------------------- *
* |bchai|hiddn| fpscr | PC | CR | R14 | R15 | *
* ------------------------------------------------- *
* | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
* ------------------------------------------------- *
* | 32 | 36 | 40 | 44 | 48 | 52 | 56 | 60 | *
* ------------------------------------------------- *
* | F18 | F19 | F20 | F21 | *
* ------------------------------------------------- *
* | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | *
* ------------------------------------------------- *
* | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | *
* ------------------------------------------------- *
* | 64 | 68 | 72 | 76 | 80 | 84 | 88 | 92 | *
* ------------------------------------------------- *
* | F22 | F23 | F24 | F25 | *
* ------------------------------------------------- *
* | R24 | R25 | R26 | R27 | R28 | R29 | R30 | R31 | *
* ------------------------------------------------- *
* | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | *
* ------------------------------------------------- *
* | 96 | 100 | 104 | 108 | 112 | 116 | 120 | 124 | *
* ------------------------------------------------- *
* | F26 | F27 | F28 | F29 | *
* ------------------------------------------------- *
* | F14 | F15 | F16 | F17 | *
* ------------------------------------------------- *
* | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | *
* ------------------------------------------------- *
* | 128 | 132 | 136 | 140 | 144 | 148 | 152 | 156 | *
* ------------------------------------------------- *
* | F30 | F31 | fpscr | R13 | R14 | *
* ------------------------------------------------- *
* | F18 | F19 | F20 | F21 | *
* ------------------------------------------------- *
* | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | *
* ------------------------------------------------- *
* | 160 | 164 | 168 | 172 | 176 | 180 | 184 | 188 | *
* ------------------------------------------------- *
* | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | *
* ------------------------------------------------- *
* | F22 | F23 | F24 | F25 | *
* ------------------------------------------------- *
* | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | *
* ------------------------------------------------- *
* | 192 | 196 | 200 | 204 | 208 | 212 | 216 | 220 | *
* ------------------------------------------------- *
* | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | *
* ------------------------------------------------- *
* | 224 | 228 | 232 | 236 | 240 | 244 | 248 | 252 | *
* ------------------------------------------------- *
* | R31 |hiddn| CR | LR | PC |bchai|linkr| FCTX| *
* ------------------------------------------------- *
* ------------------------------------------------- *
* | 64 | | *
* ------------------------------------------------- *
* | 256 | | *
* ------------------------------------------------- *
* | DATA| | *
* | F26 | F27 | F28 | F29 | *
* ------------------------------------------------- *
* ------------------------|------------ *
* | 224 | 228 | 232 | 236 | 240 | 244 | *
* ------------------------|------------ *
* | F30 | F31 |bchai| LR | *
* ------------------------|------------ *
* *
*******************************************************/

Expand All @@ -79,125 +56,142 @@
.align 2
.type jump_fcontext,@function
jump_fcontext:
# reserve space on stack
subi %r1, %r1, 244

stfd %f14, 0(%r1) # save F14
stfd %f15, 8(%r1) # save F15
stfd %f16, 16(%r1) # save F16
stfd %f17, 24(%r1) # save F17
stfd %f18, 32(%r1) # save F18
stfd %f19, 40(%r1) # save F19
stfd %f20, 48(%r1) # save F20
stfd %f21, 56(%r1) # save F21
stfd %f22, 64(%r1) # save F22
stfd %f23, 72(%r1) # save F23
stfd %f24, 80(%r1) # save F24
stfd %f25, 88(%r1) # save F25
stfd %f26, 96(%r1) # save F26
stfd %f27, 104(%r1) # save F27
stfd %f28, 112(%r1) # save F28
stfd %f29, 120(%r1) # save F29
stfd %f30, 128(%r1) # save F30
stfd %f31, 136(%r1) # save F31
mffs %f0 # load FPSCR
stfd %f0, 144(%r1) # save FPSCR

stw %r13, 152(%r1) # save R13
stw %r14, 156(%r1) # save R14
stw %r15, 160(%r1) # save R15
stw %r16, 164(%r1) # save R16
stw %r17, 168(%r1) # save R17
stw %r18, 172(%r1) # save R18
stw %r19, 176(%r1) # save R19
stw %r20, 180(%r1) # save R20
stw %r21, 184(%r1) # save R21
stw %r22, 188(%r1) # save R22
stw %r23, 192(%r1) # save R23
stw %r24, 196(%r1) # save R24
stw %r25, 200(%r1) # save R25
stw %r26, 204(%r1) # save R26
stw %r27, 208(%r1) # save R27
stw %r28, 212(%r1) # save R28
stw %r29, 216(%r1) # save R29
stw %r30, 220(%r1) # save R30
stw %r31, 224(%r1) # save R31
stw %r3, 228(%r1) # save hidden

# save CR
mfcr %r0
stw %r0, 232(%r1)
# save LR
mflr %r0
stw %r0, 236(%r1)
# save LR as PC
stw %r0, 240(%r1)

# store RSP (pointing to context-data) in R6
mr %r6, %r1

# restore RSP (pointing to context-data) from R4
mr %r1, %r4

lfd %f14, 0(%r1) # restore F14
lfd %f15, 8(%r1) # restore F15
lfd %f16, 16(%r1) # restore F16
lfd %f17, 24(%r1) # restore F17
lfd %f18, 32(%r1) # restore F18
lfd %f19, 40(%r1) # restore F19
lfd %f20, 48(%r1) # restore F20
lfd %f21, 56(%r1) # restore F21
lfd %f22, 64(%r1) # restore F22
lfd %f23, 72(%r1) # restore F23
lfd %f24, 80(%r1) # restore F24
lfd %f25, 88(%r1) # restore F25
lfd %f26, 96(%r1) # restore F26
lfd %f27, 104(%r1) # restore F27
lfd %f28, 112(%r1) # restore F28
lfd %f29, 120(%r1) # restore F29
lfd %f30, 128(%r1) # restore F30
lfd %f31, 136(%r1) # restore F31
lfd %f0, 144(%r1) # load FPSCR
mtfsf 0xff, %f0 # restore FPSCR

lwz %r13, 152(%r1) # restore R13
lwz %r14, 156(%r1) # restore R14
lwz %r15, 160(%r1) # restore R15
lwz %r16, 164(%r1) # restore R16
lwz %r17, 168(%r1) # restore R17
lwz %r18, 172(%r1) # restore R18
lwz %r19, 176(%r1) # restore R19
lwz %r20, 180(%r1) # restore R20
lwz %r21, 184(%r1) # restore R21
lwz %r22, 188(%r1) # restore R22
lwz %r23, 192(%r1) # restore R23
lwz %r24, 196(%r1) # restore R24
lwz %r25, 200(%r1) # restore R25
lwz %r26, 204(%r1) # restore R26
lwz %r27, 208(%r1) # restore R27
lwz %r28, 212(%r1) # restore R28
lwz %r29, 216(%r1) # restore R29
lwz %r30, 220(%r1) # restore R30
lwz %r31, 224(%r1) # restore R31
lwz %r3, 228(%r1) # restore hidden

# restore CR
lwz %r0, 232(%r1)
mtcr %r0
# restore LR
lwz %r0, 236(%r1)
# Linux: jump_fcontext( hidden transfer_t * R3, R4, R5)
# Other: transfer_t R3:R4 = jump_fcontext( R3, R4)

mflr %r0 # return address from LR
mffs %f0 # FPSCR
mfcr %r8 # condition register

stwu %r1, -240(%r1) # allocate stack space, R1 % 16 == 0
stw %r0, 244(%r1) # save LR in caller's frame

#ifdef __linux__
stw %r3, 4(%r1) # hidden pointer
#endif

stfd %f0, 8(%r1) # FPSCR
stw %r0, 16(%r1) # LR as PC
stw %r8, 20(%r1) # CR

# Save registers R14 to R31.
# Don't change R2, the thread-local storage pointer.
# Don't change R13, the small data pointer.
stw %r14, 24(%r1)
stw %r15, 28(%r1)
stw %r16, 32(%r1)
stw %r17, 36(%r1)
stw %r18, 40(%r1)
stw %r19, 44(%r1)
stw %r20, 48(%r1)
stw %r21, 52(%r1)
stw %r22, 56(%r1)
stw %r23, 60(%r1)
stw %r24, 64(%r1)
stw %r25, 68(%r1)
stw %r26, 72(%r1)
stw %r27, 76(%r1)
stw %r28, 80(%r1)
stw %r29, 84(%r1)
stw %r30, 88(%r1)
stw %r31, 92(%r1)

# Save registers F14 to F31 in slots with 8-byte alignment.
# 4-byte alignment may stall the pipeline of some processors.
# Less than 4 may cause alignment traps.
stfd %f14, 96(%r1)
stfd %f15, 104(%r1)
stfd %f16, 112(%r1)
stfd %f17, 120(%r1)
stfd %f18, 128(%r1)
stfd %f19, 136(%r1)
stfd %f20, 144(%r1)
stfd %f21, 152(%r1)
stfd %f22, 160(%r1)
stfd %f23, 168(%r1)
stfd %f24, 176(%r1)
stfd %f25, 184(%r1)
stfd %f26, 192(%r1)
stfd %f27, 200(%r1)
stfd %f28, 208(%r1)
stfd %f29, 216(%r1)
stfd %f30, 224(%r1)
stfd %f31, 232(%r1)

# store RSP (pointing to context-data) in R7/R6
# restore RSP (pointing to context-data) from R4/R3
#ifdef __linux__
mr %r7, %r1
mr %r1, %r4
lwz %r3, 4(%r1) # hidden pointer
#else
mr %r6, %r1
mr %r1, %r3
#endif

lfd %f0, 8(%r1) # FPSCR
lwz %r0, 16(%r1) # PC
lwz %r8, 20(%r1) # CR

mtfsf 0xff, %f0 # restore FPSCR
mtctr %r0 # load CTR with PC
mtcr %r8 # restore CR

# restore R14 to R31
lwz %r14, 24(%r1)
lwz %r15, 28(%r1)
lwz %r16, 32(%r1)
lwz %r17, 36(%r1)
lwz %r18, 40(%r1)
lwz %r19, 44(%r1)
lwz %r20, 48(%r1)
lwz %r21, 52(%r1)
lwz %r22, 56(%r1)
lwz %r23, 60(%r1)
lwz %r24, 64(%r1)
lwz %r25, 68(%r1)
lwz %r26, 72(%r1)
lwz %r27, 76(%r1)
lwz %r28, 80(%r1)
lwz %r29, 84(%r1)
lwz %r30, 88(%r1)
lwz %r31, 92(%r1)

# restore F14 to F31
lfd %f14, 96(%r1)
lfd %f15, 104(%r1)
lfd %f16, 112(%r1)
lfd %f17, 120(%r1)
lfd %f18, 128(%r1)
lfd %f19, 136(%r1)
lfd %f20, 144(%r1)
lfd %f21, 152(%r1)
lfd %f22, 160(%r1)
lfd %f23, 168(%r1)
lfd %f24, 176(%r1)
lfd %f25, 184(%r1)
lfd %f26, 192(%r1)
lfd %f27, 200(%r1)
lfd %f28, 208(%r1)
lfd %f29, 216(%r1)
lfd %f30, 224(%r1)
lfd %f31, 232(%r1)

# restore LR from caller's frame
lwz %r0, 244(%r1)
mtlr %r0
# load PC
lwz %r0, 240(%r1)
# restore CTR
mtctr %r0

# adjust stack
addi %r1, %r1, 244
addi %r1, %r1, 240

# return transfer_t
stw %r6, 0(%r3)
# return transfer_t
#ifdef __linux__
stw %r7, 0(%r3)
stw %r5, 4(%r3)
#else
mr %r3, %r6
# %r4, %r4
#endif

# jump to context
bctr
Expand Down
Loading

0 comments on commit df8fb6b

Please sign in to comment.