From c9ee7cfbf6c678e52848b9dd11de6665ef07efe7 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Wed, 19 Jul 2017 13:46:32 +1000 Subject: [PATCH 1/5] Portable aligned alloc implementation for heap allocated LRUCacheShard Signed-off-by: Daniel Black --- cache/lru_cache.cc | 8 ++++++++ cache/lru_cache.h | 5 +++++ port/port_posix.cc | 17 +++++++++++++++++ port/port_posix.h | 4 ++++ port/win/port_win.h | 9 +++++++++ 5 files changed, 43 insertions(+) diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index 2a4c0f77a08..5d726c204ff 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -233,6 +233,14 @@ void LRUCacheShard::EvictFromLRU(size_t charge, } } +void* LRUCacheShard::operator new(size_t size) { + return rocksdb::port::cacheline_aligned_alloc(size); +} + +void LRUCacheShard::operator delete(void *memblock) { + rocksdb::port::cacheline_aligned_free(memblock); +} + void LRUCacheShard::SetCapacity(size_t capacity) { autovector last_reference_list; { diff --git a/cache/lru_cache.h b/cache/lru_cache.h index 5fbe0f26459..36606207547 100644 --- a/cache/lru_cache.h +++ b/cache/lru_cache.h @@ -202,6 +202,11 @@ class LRUCacheShard : public CacheShard { // not threadsafe size_t TEST_GetLRUSize(); + // Overloading to aligned it to cache line size + void* operator new(size_t); + + void operator delete(void *); + private: void LRU_Remove(LRUHandle* e); void LRU_Insert(LRUHandle* e); diff --git a/port/port_posix.cc b/port/port_posix.cc index 59241daff44..ee073a55d3f 100644 --- a/port/port_posix.cc +++ b/port/port_posix.cc @@ -184,5 +184,22 @@ int GetMaxOpenFiles() { return -1; } +void *cacheline_aligned_alloc(size_t size) { +#if defined (_ISOC11_SOURCE) + return aligned_alloc(CACHE_LINE_SIZE, size); +#elif ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || defined(__APPLE__)) + void *m; + errno = posix_memalign(&m, CACHE_LINE_SIZE, size); + return errno ? NULL : m; +#else + return malloc(size); +#endif +} + +void cacheline_aligned_free(void *memblock) { + free(memblock); +} + + } // namespace port } // namespace rocksdb diff --git a/port/port_posix.h b/port/port_posix.h index 72beb0409f3..967315ed27e 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -193,6 +193,10 @@ extern void InitOnce(OnceType* once, void (*initializer)()); #endif #endif +extern void *cacheline_aligned_alloc(size_t size); + +extern void cacheline_aligned_free(void *memblock); + #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) extern void Crash(const std::string& srcfile, int srcline); diff --git a/port/win/port_win.h b/port/win/port_win.h index bbc5feec31b..4452da8b312 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -239,6 +240,14 @@ extern void InitOnce(OnceType* once, void (*initializer)()); #define CACHE_LINE_SIZE 64U #endif +inline void *cacheline_aligned_alloc(size_t size) { + return _aligned_malloc(CACHE_LINE_SIZE, size); +} + +inline void cacheline_aligned_free(void *memblock) { + _aligned_free(memblock); +} + static inline void AsmVolatilePause() { #if defined(_M_IX86) || defined(_M_X64) YieldProcessor(); From 5af39617a87c40749362fc33928146e9a9c9051c Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Tue, 11 Jul 2017 12:36:05 -0700 Subject: [PATCH 2/5] Reorder and align LRUCacheShard data members to reduce false sharing --- cache/lru_cache.cc | 4 ++-- cache/lru_cache.h | 37 ++++++++++++++++++++++++------------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index 5d726c204ff..764766a41dc 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -22,7 +22,7 @@ namespace rocksdb { -LRUHandleTable::LRUHandleTable() : length_(0), elems_(0), list_(nullptr) { +LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) { Resize(); } @@ -100,7 +100,7 @@ void LRUHandleTable::Resize() { } LRUCacheShard::LRUCacheShard() - : usage_(0), lru_usage_(0), high_pri_pool_usage_(0) { + : high_pri_pool_usage_(0), usage_(0), lru_usage_(0) { // Make empty circular linked list lru_.next = &lru_; lru_.prev = &lru_; diff --git a/cache/lru_cache.h b/cache/lru_cache.h index 36606207547..02fdddc489d 100644 --- a/cache/lru_cache.h +++ b/cache/lru_cache.h @@ -148,13 +148,13 @@ class LRUHandleTable { // The table consists of an array of buckets where each bucket is // a linked list of cache entries that hash into the bucket. + LRUHandle** list_; uint32_t length_; uint32_t elems_; - LRUHandle** list_; }; // A single shard of sharded cache. -class LRUCacheShard : public CacheShard { +class alignas(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard { public: LRUCacheShard(); virtual ~LRUCacheShard(); @@ -228,12 +228,6 @@ class LRUCacheShard : public CacheShard { // Initialized before use. size_t capacity_; - // Memory size for entries residing in the cache - size_t usage_; - - // Memory size for entries residing only in the LRU list - size_t lru_usage_; - // Memory size for entries in high-pri pool. size_t high_pri_pool_usage_; @@ -247,11 +241,6 @@ class LRUCacheShard : public CacheShard { // Remember the value to avoid recomputing each time. double high_pri_pool_capacity_; - // mutex_ protects the following state. - // We don't count mutex_ as the cache's internal state so semantically we - // don't mind mutex_ invoking the non-const actions. - mutable port::Mutex mutex_; - // Dummy head of LRU list. // lru.prev is newest entry, lru.next is oldest entry. // LRU contains items which can be evicted, ie reference only by cache @@ -260,7 +249,29 @@ class LRUCacheShard : public CacheShard { // Pointer to head of low-pri pool in LRU list. LRUHandle* lru_low_pri_; + // ------------^^^^^^^^^^^^^----------- + // Not frequently modified data members + // ------------------------------------ + // + // We separate data members that are updated frequently from the ones that + // are not frequently updated so that they don't share the same cache line + // which will lead into false cache sharing + // + // ------------------------------------ + // Frequently modified data members + // ------------vvvvvvvvvvvvv----------- LRUHandleTable table_; + + // Memory size for entries residing in the cache + size_t usage_; + + // Memory size for entries residing only in the LRU list + size_t lru_usage_; + + // mutex_ protects the following state. + // We don't count mutex_ as the cache's internal state so semantically we + // don't mind mutex_ invoking the non-const actions. + mutable port::Mutex mutex_; }; class LRUCache : public ShardedCache { From 0df89f1be344a7ced910aa4be80e75a507220942 Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Thu, 13 Jul 2017 11:59:58 -0700 Subject: [PATCH 3/5] fix windows --- cache/lru_cache.h | 2 +- port/port_posix.h | 3 +++ port/win/port_win.h | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cache/lru_cache.h b/cache/lru_cache.h index 02fdddc489d..2fd44bbce50 100644 --- a/cache/lru_cache.h +++ b/cache/lru_cache.h @@ -154,7 +154,7 @@ class LRUHandleTable { }; // A single shard of sharded cache. -class alignas(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard { +class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard { public: LRUCacheShard(); virtual ~LRUCacheShard(); diff --git a/port/port_posix.h b/port/port_posix.h index 967315ed27e..fe0d42644c4 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -193,10 +193,13 @@ extern void InitOnce(OnceType* once, void (*initializer)()); #endif #endif + extern void *cacheline_aligned_alloc(size_t size); extern void cacheline_aligned_free(void *memblock); +#define ALIGN_AS(n) alignas(n) + #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) extern void Crash(const std::string& srcfile, int srcline); diff --git a/port/win/port_win.h b/port/win/port_win.h index 4452da8b312..ad26fe6078a 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -240,6 +240,7 @@ extern void InitOnce(OnceType* once, void (*initializer)()); #define CACHE_LINE_SIZE 64U #endif + inline void *cacheline_aligned_alloc(size_t size) { return _aligned_malloc(CACHE_LINE_SIZE, size); } @@ -248,6 +249,8 @@ inline void cacheline_aligned_free(void *memblock) { _aligned_free(memblock); } +#define ALIGN_AS(n) __declspec(align(n)) + static inline void AsmVolatilePause() { #if defined(_M_IX86) || defined(_M_X64) YieldProcessor(); From 1dec7cf43eb6db7e2970651506c54fadd1cc54f6 Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Thu, 20 Jul 2017 09:49:52 +1000 Subject: [PATCH 4/5] Disable MSVC errors due to alignment Signed-off-by: Daniel Black --- cache/lru_cache.cc | 7 +++++++ cache/lru_cache_test.cc | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index 764766a41dc..f833374e73c 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -457,7 +457,14 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, double high_pri_pool_ratio) : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { num_shards_ = 1 << num_shard_bits; +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable: 4316) // We've validated the alignment with the new operators +#endif shards_ = new LRUCacheShard[num_shards_]; +#if defined(_MSC_VER) +#pragma warning(pop) +#endif SetCapacity(capacity); SetStrictCapacityLimit(strict_capacity_limit); for (int i = 0; i < num_shards_; i++) { diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 87794fd1617..1b83033c36c 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -17,7 +17,16 @@ class LRUCacheTest : public testing::Test { ~LRUCacheTest() {} void NewCache(size_t capacity, double high_pri_pool_ratio = 0.0) { - cache_.reset(new LRUCacheShard()); + cache_.reset( +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable: 4316) // We've validated the alignment with the new operators +#endif + new LRUCacheShard() +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + ); cache_->SetCapacity(capacity); cache_->SetStrictCapacityLimit(false); cache_->SetHighPriorityPoolRatio(high_pri_pool_ratio); From 666a6868924f249260576ffa78566d3b9d1f045f Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Fri, 21 Jul 2017 11:20:33 +1000 Subject: [PATCH 5/5] disable aligned attribute for mingw32 --- port/win/port_win.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/port/win/port_win.h b/port/win/port_win.h index ad26fe6078a..1ec09068335 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -249,7 +249,13 @@ inline void cacheline_aligned_free(void *memblock) { _aligned_free(memblock); } +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52991 for MINGW32 +// could not be worked around with by -mno-ms-bitfields +#ifndef __MINGW32__ #define ALIGN_AS(n) __declspec(align(n)) +#else +#define ALIGN_AS(n) +#endif static inline void AsmVolatilePause() { #if defined(_M_IX86) || defined(_M_X64)