Skip to content

Commit

Permalink
Force ldrd on armv7
Browse files Browse the repository at this point in the history
Summary:
Clang generates suboptimal code for armv7 when loading a 64 bit value
with a register offset that is added rather than subtracted. For
example, when loading from a register, `frameRegs[-offset]` will
compile to:
```
sub.w   r0, r0, r1, lsl #3
ldrd    r0, r1, [r0]
bx      lr
```

Whereas `frameRegs[offset]` will compile to:
```
ldr.w   r2, [r0, r1, lsl #3]
add.w   r0, r0, r1, lsl #3
ldr     r1, [r0, #4]
mov     r0, r2
bx      lr
```

By using inline asm, we can force clang to perform the add as a
separate operation before the load, which prevents it from being folded
into the load and allows it to use the more efficient 64 bit load
instead.

Reviewed By: kodafb

Differential Revision: D31030620

fbshipit-source-id: 9620205e6382943c4a113d00c42d558bb5fbcdfc
  • Loading branch information
neildhar authored and facebook-github-bot committed Sep 28, 2021
1 parent d520bca commit bbc8ab0
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ set(HERMESVM_HEAP_SEGMENT_SIZE_KB 4096
set(HERMESVM_ALLOW_CONCURRENT_GC ON CACHE BOOL
"Enable concurrency in the GC for 64-bit builds.")

set(HERMESVM_ALLOW_INLINE_ASM ON CACHE BOOL
"Allow the use of inline assembly in VM code.")

set(HERMESVM_API_TRACE_ANDROID_REPLAY OFF CACHE BOOL
"Simulate Android config on Linux in API tracing.")

Expand Down Expand Up @@ -349,6 +352,9 @@ add_definitions(-DHERMESVM_HEAP_SEGMENT_SIZE_KB=${HERMESVM_HEAP_SEGMENT_SIZE_KB}
if(HERMESVM_ALLOW_CONCURRENT_GC)
add_definitions(-DHERMESVM_ALLOW_CONCURRENT_GC)
endif()
if(HERMESVM_ALLOW_INLINE_ASM)
add_definitions(-DHERMESVM_ALLOW_INLINE_ASM)
endif()
if(HERMESVM_API_TRACE_ANDROID_REPLAY)
add_definitions(-DHERMESVM_API_TRACE_ANDROID_REPLAY)
endif()
Expand Down
16 changes: 16 additions & 0 deletions lib/VM/Interpreter-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,23 @@
#include "hermes/VM/Interpreter.h"

// Convenient aliases for operand registers.
#if !defined(__arm__) || !defined(__clang__) || \
!defined(HERMESVM_ALLOW_INLINE_ASM)
#define REG(index) frameRegs[index]
#else
// Work around bad codegen from clang on armv7 that causes it to emit two
// separate loads instead of a single ldrd.
#define REG(index) \
(*({ \
PinnedHermesValue *addr; \
static_assert(sizeof(PinnedHermesValue) == 8, "asm depends on HV size"); \
static_assert(sizeof(index) <= 4, "index must fit in a register"); \
asm("add.w %0, %1, %2, lsl #3" \
: "=r"(addr) \
: "r"(frameRegs), "r"(index)); \
addr; \
}))
#endif
#define O1REG(name) REG(ip->i##name.op1)
#define O2REG(name) REG(ip->i##name.op2)
#define O3REG(name) REG(ip->i##name.op3)
Expand Down
3 changes: 2 additions & 1 deletion lib/VM/Interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,8 @@ static void printDebugInfo(
if (operandType == OperandType::Reg8 || operandType == OperandType::Reg32) {
// Print the register value, if source.
if (i != 0 || decoded.meta.numOperands == 1)
dbgs() << "=" << DumpHermesValue(REG(value.integer));
dbgs() << "="
<< DumpHermesValue(REG(static_cast<uint32_t>(value.integer)));
}
}

Expand Down

0 comments on commit bbc8ab0

Please sign in to comment.