-
Notifications
You must be signed in to change notification settings - Fork 113
/
Copy paththreadalloc.h
308 lines (283 loc) · 9.98 KB
/
threadalloc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#pragma once
#include "../ds/helpers.h"
#include "globalalloc.h"
#if defined(SNMALLOC_USE_THREAD_DESTRUCTOR) && \
defined(SNMALLOC_USE_THREAD_CLEANUP)
#error At most one out of SNMALLOC_USE_THREAD_CLEANUP and SNMALLOC_USE_THREAD_DESTRUCTOR may be defined.
#endif
extern "C" void _malloc_thread_cleanup();
namespace snmalloc
{
#ifdef SNMALLOC_EXTERNAL_THREAD_ALLOC
/**
* Version of the `ThreadAlloc` interface that does no management of thread
* local state, and just assumes that "ThreadAllocUntyped::get" has been
* declared before including snmalloc.h. As it is included before, it cannot
* know the allocator type, hence the casting.
*
* This class is used only when snmalloc is compiled as part of a runtime,
* which has its own management of the thread local allocator pointer.
*/
class ThreadAllocUntypedWrapper
{
protected:
static void register_cleanup() {}
public:
static SNMALLOC_FAST_PATH Alloc* get_noncachable()
{
return (Alloc*)ThreadAllocUntyped::get();
}
static SNMALLOC_FAST_PATH Alloc* get()
{
return (Alloc*)ThreadAllocUntyped::get();
}
};
/**
* Function passed as a template parameter to `Allocator` to allow lazy
* replacement. This function returns true, if the allocator passed in
* requires initialisation. As the TLS state is managed externally,
* this will always return false.
*/
SNMALLOC_FAST_PATH bool needs_initialisation(void* existing)
{
UNUSED(existing);
return false;
}
/**
* Function passed as a template parameter to `Allocator` to allow lazy
* replacement. There is nothing to initialise in this case, so we expect
* this to never be called.
*/
# ifdef _MSC_VER
// 32Bit Windows release MSVC is determining this as having unreachable code for
// f(nullptr), which is true. But other platforms don't. Disabling the warning
// seems simplist.
# pragma warning(push)
# pragma warning(disable : 4702)
# endif
SNMALLOC_FAST_PATH void* init_thread_allocator(function_ref<void*(void*)> f)
{
error("Critical Error: This should never be called.");
return f(nullptr);
}
# ifdef _MSC_VER
# pragma warning(pop)
# endif
using ThreadAlloc = ThreadAllocUntypedWrapper;
#else
/**
* A global fake allocator object. This never allocates memory and, as a
* result, never owns any slabs. On the slow paths, where it would fetch
* slabs to allocate from, it will discover that it is the placeholder and
* replace itself with the thread-local allocator, allocating one if
* required. This avoids a branch on the fast path.
*
* The fake allocator is a zero initialised area of memory of the correct
* size. All data structures used potentially before initialisation must be
* okay with zero init to move to the slow path, that is, zero must signify
* empty.
*/
inline const char GlobalPlaceHolder[sizeof(Alloc)] = {0};
inline Alloc* get_GlobalPlaceHolder()
{
// This cast is not legal. Effectively, we want a minimal constructor
// for the global allocator as zero, and then a second constructor for
// the rest. This is UB.
auto a = reinterpret_cast<const Alloc*>(&GlobalPlaceHolder);
return const_cast<Alloc*>(a);
}
/**
* Common aspects of thread local allocator. Subclasses handle how releasing
* the allocator is triggered.
*/
class ThreadAllocCommon
{
friend void* init_thread_allocator(function_ref<void*(void*)>);
protected:
/**
* Thread local variable that is set to true, once `inner_release`
* has been run. If we try to reinitialise the allocator once
* `inner_release` has run, then we can stay on the slow path so we don't
* leak allocators.
*
* This is required to allow for the allocator to be called during
* destructors of other thread_local state.
*/
inline static thread_local bool destructor_has_run = false;
static inline void inner_release()
{
auto& per_thread = get_reference();
if (per_thread != get_GlobalPlaceHolder())
{
current_alloc_pool()->release(per_thread);
destructor_has_run = true;
per_thread = get_GlobalPlaceHolder();
}
}
/**
* Default clean up does nothing except print statistics if enabled.
*/
static bool register_cleanup()
{
# ifdef USE_SNMALLOC_STATS
Singleton<int, atexit_print_stats>::get();
# endif
return false;
}
# ifdef USE_SNMALLOC_STATS
static void print_stats()
{
Stats s;
current_alloc_pool()->aggregate_stats(s);
s.print<Alloc>(std::cout);
}
static int atexit_print_stats() noexcept
{
return atexit(print_stats);
}
# endif
public:
/**
* Returns a reference to the allocator for the current thread. This allows
* the caller to replace the current thread's allocator.
*/
static inline Alloc*& get_reference()
{
// Inline casting as codegen doesn't create a lazy init like this.
static thread_local Alloc* alloc =
const_cast<Alloc*>(reinterpret_cast<const Alloc*>(&GlobalPlaceHolder));
return alloc;
}
/**
* Public interface, returns the allocator for this thread, constructing
* one if necessary.
*
* If no operations have been performed on an allocator returned by either
* `get()` nor `get_noncachable()`, then the value contained in the return
* will be an Alloc* that will always use the slow path.
*
* Only use this API if you intend to use the returned allocator just once
* per call, or if you know other calls have already been made to the
* allocator.
*/
static inline Alloc* get_noncachable()
{
return get_reference();
}
/**
* Public interface, returns the allocator for this thread, constructing
* one if necessary.
* This incurs a cost, so use `get_noncachable` if you can meet its
* criteria.
*/
static SNMALLOC_FAST_PATH Alloc* get()
{
# ifdef SNMALLOC_PASS_THROUGH
return get_reference();
# else
auto*& alloc = get_reference();
if (unlikely(needs_initialisation(alloc)) && !destructor_has_run)
{
// Call `init_thread_allocator` to perform down call in case
// register_clean_up does more.
// During teardown for the destructor based ThreadAlloc this will set
// alloc to GlobalPlaceHolder;
init_thread_allocator([](void*) { return nullptr; });
}
return alloc;
# endif
}
};
/**
* Version of the `ThreadAlloc` interface that uses a hook provided by libc
* to destroy thread-local state. This is the ideal option, because it
* enforces ordering of destruction such that the malloc state is destroyed
* after anything that can allocate memory.
*
* This class is used only when snmalloc is compiled as part of a compatible
* libc (for example, FreeBSD libc).
*/
class ThreadAllocLibcCleanup : public ThreadAllocCommon
{
/**
* Libc will call `_malloc_thread_cleanup` just before a thread terminates.
* This function must be allowed to call back into this class to destroy
* the state.
*/
friend void ::_malloc_thread_cleanup();
};
/**
* Version of the `ThreadAlloc` interface that uses C++ `thread_local`
* destructors for cleanup. If a per-thread allocator is used during the
* destruction of other per-thread data, this class will create a new
* instance and register its destructor, so should eventually result in
* cleanup, but may result in allocators being returned to the global pool
* and then reacquired multiple times.
*
* This implementation depends on nothing outside of a working C++
* environment and so should be the simplest for initial bringup on an
* unsupported platform. It is currently used in the FreeBSD kernel version.
*/
class ThreadAllocThreadDestructor : public ThreadAllocCommon
{
template<void f()>
friend class OnDestruct;
public:
static bool register_cleanup()
{
static thread_local OnDestruct<ThreadAllocCommon::inner_release> tidier;
ThreadAllocCommon::register_cleanup();
return destructor_has_run;
}
};
# ifdef SNMALLOC_USE_THREAD_CLEANUP
/**
* Entry point that allows libc to call into the allocator for per-thread
* cleanup.
*/
extern "C" void _malloc_thread_cleanup()
{
ThreadAllocLibcCleanup::inner_release();
}
using ThreadAlloc = ThreadAllocLibcCleanup;
# else
using ThreadAlloc = ThreadAllocThreadDestructor;
# endif
/**
* Slow path for the placeholder replacement.
* Function passed as a tempalte parameter to `Allocator` to allow lazy
* replacement. This function initialises the thread local state if requried.
* The simple check that this is the global placeholder is inlined, the rest
* of it is only hit in a very unusual case and so should go off the fast
* path.
* The second component of the return indicates if this TLS is being torndown.
*/
SNMALLOC_FAST_PATH void* init_thread_allocator(function_ref<void*(void*)> f)
{
auto*& local_alloc = ThreadAlloc::get_reference();
// If someone reuses a noncachable call, then we can end up here
// with an already initialised allocator. Could either error
// to say stop doing this, or just give them the initialised version.
if (local_alloc == get_GlobalPlaceHolder())
{
local_alloc = current_alloc_pool()->acquire();
}
auto result = f(local_alloc);
// Check if we have already run the destructor for the TLS. If so,
// we need to deallocate the allocator.
if (ThreadAlloc::register_cleanup())
ThreadAlloc::inner_release();
return result;
}
/**
* Function passed as a template parameter to `Allocator` to allow lazy
* replacement. This function returns true, if the allocated passed in,
* is the placeholder allocator. If it returns true, then
* `init_thread_allocator` should be called.
*/
SNMALLOC_FAST_PATH bool needs_initialisation(void* existing)
{
return existing == get_GlobalPlaceHolder();
}
#endif
} // namespace snmalloc