Skip to content

Commit

Permalink
ARM64: optimize PREPARE_EXTERNAL:VAR (dotnet#21) (dotnet#8296)
Browse files Browse the repository at this point in the history
There are multiple cases were we don't need just the pointer for an external variable but the value that is stored at this location.
So far this was done with the PREPARE_EXTERNAL_VAR followed by an ldr x? [x?]. The PREPARE_EXTERNAL_VAR macro needs two instructions (adrp + add). As the ldr instruction supports an offset we can eliminate the add for this use case. The two new macros PREPARE_EXTERNAL_VAR_INDIRECT and PREPARE_EXTERNAL_VAR_INDIRECT_W make use of this.
  • Loading branch information
RalfKornmannEnvision authored and jkotas committed Sep 13, 2020
1 parent 93ef6c3 commit 86e99b4
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 36 deletions.
3 changes: 1 addition & 2 deletions src/coreclr/src/nativeaot/Runtime/arm64/ExceptionHandling.S
Original file line number Diff line number Diff line change
Expand Up @@ -465,9 +465,8 @@ PopExInfoLoop:
DonePopping:
str x3, [x1, #OFFSETOF__Thread__m_pExInfoStackHead] // store the new head on the Thread

PREPARE_EXTERNAL_VAR RhpTrapThreads, x3
PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 3

ldr w3, [x3]
tbz x3, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort

ldr x3, [sp, #rsp_offset_is_not_handling_thread_abort]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ POINTER_SIZE = 0x08
// IntPtr RhGetCommonStubAddress()
//
LEAF_ENTRY RhGetCommonStubAddress, _TEXT
adrp x0, RhCommonStub
add x0, x0, :lo12:RhCommonStub
PREPARE_EXTERNAL_VAR RhCommonStub, x0
ret
LEAF_END RhGetCommonStubAddress, _TEXT

Expand Down
12 changes: 4 additions & 8 deletions src/coreclr/src/nativeaot/Runtime/arm64/PInvoke.S
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,8 @@ Done:

PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x10

PREPARE_EXTERNAL_VAR RhpTrapThreads, x10
PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 10

ldr w10, [x10]
tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait
bl RhpWaitForGCNoAbort
NoWait:
Expand Down Expand Up @@ -181,9 +180,8 @@ ThreadAttached:
str xzr, [x10, #OFFSETOF__Thread__m_pTransitionFrame]
dmb ish

PREPARE_EXTERNAL_VAR RhpTrapThreads, x11
PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 11

ldr w11, [x11]
tbnz x11, #TrapThreadsFlags_TrapThreads_Bit, TrapThread

ret
Expand Down Expand Up @@ -294,9 +292,8 @@ NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler
str x10, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
str x0, [x10, #OFFSETOF__Thread__m_pTransitionFrame]

PREPARE_EXTERNAL_VAR RhpTrapThreads, x9
PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 9

ldr w9, [x9]
cbnz w9, InvokeRareTrapThread // TrapThreadsFlags_None = 0
ret

Expand All @@ -310,9 +307,8 @@ LEAF_ENTRY RhpPInvokeReturn, _TEXT
mov x10, 0
str x10, [x9, #OFFSETOF__Thread__m_pTransitionFrame]

PREPARE_EXTERNAL_VAR RhpTrapThreads, x9
PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 9

ldr w9, [x9]
cbnz w9, 0f // TrapThreadsFlags_None = 0
ret
0:
Expand Down
3 changes: 1 addition & 2 deletions src/coreclr/src/nativeaot/Runtime/arm64/StubDispatch.S
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,7 @@
// Calling convention of the universal thunk is:
// xip0: contains target address for the thunk to call
// xip1: contains parameter of the thunks target
adrp xip0, RhpCidResolve
add xip0, xip0, :lo12:RhpCidResolve
PREPARE_EXTERNAL_VAR RhpCidResolve, xip0
mov xip1, x11
b RhpUniversalTransition_DebugStepTailCall
LEAF_END RhpInterfaceDispatchSlow, _TEXT
Expand Down
34 changes: 12 additions & 22 deletions src/coreclr/src/nativeaot/Runtime/arm64/WriteBarriers.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,22 @@
.macro UPDATE_GC_SHADOW destReg, refReg

// If g_GCShadow is 0, don't perform the check.
PREPARE_EXTERNAL_VAR g_GCShadow, X9
ldr x9, [x9]
PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, X9
cbz x9, 1f

// Save destReg since we're about to modify it (and we need the original value both within the macro and
// once we exit the macro).
mov x10, \destReg

// Transform destReg into the equivalent address in the shadow heap.
PREPARE_EXTERNAL_VAR g_lowest_address, X9
ldr x9, [x9]
PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, X9
subs \destReg, \destReg, x9
blt 0f

PREPARE_EXTERNAL_VAR g_GCShadow, X9
ldr x9, [x9]
PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, X9
add \destReg, \destReg, x9

PREPARE_EXTERNAL_VAR g_GCShadowEnd, X9
ldr x9, [x9]
PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, X9
cmp \destReg, x9
bgt 0f

Expand Down Expand Up @@ -109,8 +105,8 @@

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
// Update the write watch table if necessary
PREPARE_EXTERNAL_VAR g_write_watch_table, x\trash
ldr x\trash, [x\trash]
PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, x\trash

cbz x\trash, 2f
add x\trash, x\trash, \destReg, lsr #0xc // SoftwareWriteWatch::AddressToTableByteIndexShift
ldrb w17, [x\trash]
Expand All @@ -122,20 +118,17 @@
2:
// We can skip the card table write if the reference is to
// an object not on the epehemeral segment.
PREPARE_EXTERNAL_VAR g_ephemeral_low, x\trash
ldr x\trash, [x\trash]
PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, x\trash
cmp \refReg, x\trash
blt 0f

PREPARE_EXTERNAL_VAR g_ephemeral_high, x\trash
ldr x\trash, [x\trash]
PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x\trash
cmp \refReg, x\trash
bge 0f

// Set this objects card, if it has not already been set.

PREPARE_EXTERNAL_VAR g_card_table, x\trash
ldr x\trash, [x\trash]
PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, x\trash
add \trash2, x\trash, \destReg, lsr #11

// Check that this card has not already been written. Avoiding useless writes is a big win on
Expand All @@ -149,8 +142,7 @@

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
// Check if we need to update the card bundle table
PREPARE_EXTERNAL_VAR g_card_bundle_table, x\trash
ldr x\trash, [x\trash]
PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, x\trash
add \trash2, x\trash, \destReg, lsr #21
ldrb w\trash, [\trash2]
cmp x\trash, 0xFF
Expand Down Expand Up @@ -178,13 +170,11 @@
// The "check" of this checked write barrier - is destReg
// within the heap? if no, early out.

PREPARE_EXTERNAL_VAR g_lowest_address, x\trash
ldr x\trash, [x\trash]
PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x\trash
cmp \destReg, x\trash
blt 0f

PREPARE_EXTERNAL_VAR g_highest_address, x\trash
ldr x\trash, [x\trash]
PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x\trash
cmp \destReg, x\trash
bgt 0f

Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/src/nativeaot/Runtime/unix/unixasmmacrosarm64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ C_FUNC(\Name):
add \HelperReg, \HelperReg, :lo12:C_FUNC(\Name)
.endm

.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg
adrp \HelperReg, C_FUNC(\Name)
ldr \HelperReg, [\HelperReg, :lo12:C_FUNC(\Name)]
.endm

.macro PREPARE_EXTERNAL_VAR_INDIRECT_W Name, HelperReg
adrp x\HelperReg, C_FUNC(\Name)
ldr w\HelperReg, [x\HelperReg, :lo12:C_FUNC(\Name)]
.endm


.macro PROLOG_STACK_ALLOC Size
sub sp, sp, \Size
.cfi_adjust_cfa_offset \Size
Expand Down

0 comments on commit 86e99b4

Please sign in to comment.