From 4498e2b25bc17ace502b4c86c551dbab25f284bf Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Wed, 27 May 2015 15:35:56 -0700 Subject: [PATCH] Adjust starting address of TC automatically. Summary: HHVM performance can get affected by a size of .text section. Every size increase of 2MB causes TC to start at a new boundary. It appears that if the new boundary is offset from the start of hot text by even number of 2MB pages, we get penalized. This diff is meant to stabilize performance by keeping the distance at odd number of 2MB pages regardless of .text size and location. Reviewed By: @markw65 Differential Revision: D2103054 --- hphp/runtime/base/program-functions.cpp | 11 +------- hphp/runtime/base/runtime-option.h | 1 + hphp/util/code-cache.cpp | 34 ++++++++++++++++++++++--- hphp/util/code-cache.h | 11 +++++++- 4 files changed, 43 insertions(+), 14 deletions(-) diff --git a/hphp/runtime/base/program-functions.cpp b/hphp/runtime/base/program-functions.cpp index 5108d843472b76..de7ddeb75fce36 100644 --- a/hphp/runtime/base/program-functions.cpp +++ b/hphp/runtime/base/program-functions.cpp @@ -69,6 +69,7 @@ #include "hphp/runtime/vm/runtime.h" #include "hphp/runtime/vm/treadmill.h" #include "hphp/system/constants.h" +#include "hphp/util/code-cache.h" #include "hphp/util/compatibility.h" #include "hphp/util/capability.h" #include "hphp/util/current-executable.h" @@ -717,16 +718,6 @@ void execute_command_line_end(int xhprof, bool coverage, const char *program) { } } -#if defined(__APPLE__) || defined(__CYGWIN__) -const void* __hot_start = nullptr; -const void* __hot_end = nullptr; -#else -extern "C" { -void __attribute__((__weak__)) __hot_start(); -void __attribute__((__weak__)) __hot_end(); -} -#endif - #if FACEBOOK && defined USE_SSECRC // Overwrite the functiosn NEVER_INLINE void copyFunc(void* dst, void* src, uint32_t sz = 64) { diff --git a/hphp/runtime/base/runtime-option.h b/hphp/runtime/base/runtime-option.h index 4380dadd698228..52395ebdd4aa72 100644 --- a/hphp/runtime/base/runtime-option.h +++ b/hphp/runtime/base/runtime-option.h @@ -420,6 +420,7 @@ class RuntimeOption { F(uint64_t, JitAProfSize, 64 << 20) \ F(uint64_t, JitAColdSize, 24 << 20) \ F(uint64_t, JitAFrozenSize, 40 << 20) \ + F(uint32_t, JitAutoTCShift, 1) \ F(uint64_t, JitGlobalDataSize, kJitGlobalDataDef) \ F(uint64_t, JitRelocationSize, kJitRelocationSizeDefault) \ F(bool, JitTimer, kJitTimerDefault) \ diff --git a/hphp/util/code-cache.cpp b/hphp/util/code-cache.cpp index 5148ab5724dd97..91120b874798fc 100644 --- a/hphp/util/code-cache.cpp +++ b/hphp/util/code-cache.cpp @@ -25,6 +25,11 @@ namespace HPHP { TRACE_SET_MOD(mcg); +#if defined(__APPLE__) || defined(__CYGWIN__) +const void* __hot_start = nullptr; +const void* __hot_end = nullptr; +#endif + // This value should be enough bytes to emit the "main" part of a minimal // translation, which consists of a move and a jump. static const int kMinTranslationBytes = 16; @@ -56,6 +61,7 @@ CodeCache::CodeCache() static const size_t kRoundUp = 2 << 20; auto ru = [=] (size_t sz) { return sz + (-sz & (kRoundUp - 1)); }; + auto rd = [=] (size_t sz) { return sz & ~(kRoundUp - 1); }; const size_t kAHotSize = ru(RuntimeOption::EvalJitAHotSize); const size_t kASize = ru(RuntimeOption::EvalJitASize); @@ -99,15 +105,36 @@ CodeCache::CodeCache() // Using sbrk to ensure its in the bottom 2G, so we avoid the need for // trampolines, and get to use shorter instructions for tc addresses. size_t allocationSize = m_totalSize; + size_t baseAdjustment = 0; uint8_t* base = (uint8_t*)sbrk(0); + + // Adjust the start of TC relative to hot runtime code. What really matters + // is a number of 2MB pages in-between. We appear to benefit from odd numbers. + auto const shiftTC = [&]() -> size_t { + if (!RuntimeOption::EvalJitAutoTCShift) return 0; + // Make sure the offset from hot text is either odd or even number + // of huge pages. + const auto hugePagesDelta = (ru(reinterpret_cast(base)) - + rd(reinterpret_cast(__hot_start))) / + kRoundUp; + return ((hugePagesDelta & 1) == (RuntimeOption::EvalJitAutoTCShift & 1)) + ? 0 + : kRoundUp; + }; + if (base != (uint8_t*)-1) { assert(!(allocationSize & (kRoundUp - 1))); // Make sure that we have space to round up to the start of a huge page allocationSize += -(uint64_t)base & (kRoundUp - 1); + allocationSize += shiftTC(); base = (uint8_t*)sbrk(allocationSize); + baseAdjustment = allocationSize - m_totalSize; } if (base == (uint8_t*)-1) { allocationSize = m_totalSize + kRoundUp - 1; + if (RuntimeOption::EvalJitAutoTCShift) { + allocationSize += kRoundUp; + } base = (uint8_t*)low_malloc(allocationSize); if (!base) { base = (uint8_t*)malloc(allocationSize); @@ -117,12 +144,14 @@ CodeCache::CodeCache() allocationSize); exit(1); } + baseAdjustment = -(uint64_t)base & (kRoundUp - 1); + baseAdjustment += shiftTC(); } else { low_malloc_skip_huge(base, base + allocationSize - 1); } assert(base); + base += baseAdjustment; m_base = base; - base += -(uint64_t)base & (kRoundUp - 1); numa_interleave(base, m_totalSize); @@ -137,7 +166,6 @@ CodeCache::CodeCache() m_main.init(base, kASize, "main"); enhugen(base, RuntimeOption::EvalTCNumHugeHotMB); - m_mainBase = base; base += kASize; TRACE(1, "init aprof @%p\n", base); @@ -163,7 +191,7 @@ CodeCache::CodeCache() unprotect(); assert(base - m_base <= allocationSize); - assert(base - m_base + kRoundUp > allocationSize); + assert(base - m_base + 2 * kRoundUp > allocationSize); } CodeCache::~CodeCache() { diff --git a/hphp/util/code-cache.h b/hphp/util/code-cache.h index 0576f43cb87c9b..cbf69844f53881 100644 --- a/hphp/util/code-cache.h +++ b/hphp/util/code-cache.h @@ -22,6 +22,16 @@ namespace HPHP { +#if defined(__APPLE__) || defined(__CYGWIN__) +extern const void* __hot_start; +extern const void* __hot_end; +#else +extern "C" { +void __attribute__((__weak__)) __hot_start(); +void __attribute__((__weak__)) __hot_end(); +} +#endif + struct CodeCache { enum class Selection { Default, // 'main' @@ -89,7 +99,6 @@ struct CodeCache { private: CodeAddress m_base; - CodeAddress m_mainBase; size_t m_codeSize; size_t m_totalSize; Selection m_selection;