Skip to content

Commit f690aa3

Browse files
authored
Use Julia's finalizer implementation (#22)
This PR moves code about registering and running finalizers to `gc-common`.
1 parent 67c5c32 commit f690aa3

File tree

4 files changed

+274
-303
lines changed

4 files changed

+274
-303
lines changed

src/gc-common.c

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@ memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
4949
// finalizers
5050
// ---
5151
uint64_t finalizer_rngState[JL_RNG_SIZE];
52+
jl_mutex_t finalizers_lock;
53+
// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
54+
// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
55+
// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
56+
// It must be aligned at least 4, and it finalized immediately (at "quiescence").
57+
// `to_finalize` should not have tagged pointers.
58+
arraylist_t finalizer_list_marked;
59+
arraylist_t to_finalize;
5260

5361
void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
5462

@@ -57,6 +65,25 @@ JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
5765
jl_rng_split(finalizer_rngState, jl_current_task->rngState);
5866
}
5967

68+
// The first two entries are assumed to be empty and the rest are assumed to
69+
// be pointers to `jl_value_t` objects
70+
STATIC_INLINE void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
71+
{
72+
void **items = list->items;
73+
items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
74+
items[1] = ct->gcstack;
75+
ct->gcstack = (jl_gcframe_t*)items;
76+
}
77+
78+
STATIC_INLINE void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
79+
{
80+
arraylist_push(&to_finalize, o);
81+
arraylist_push(&to_finalize, f);
82+
// doesn't need release, since we'll keep checking (on the reader) until we see the work and
83+
// release our lock, and that will have a release barrier by then
84+
jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
85+
}
86+
6087
void run_finalizer(jl_task_t *ct, void *o, void *ff)
6188
{
6289
int ptr_finalizer = gc_ptr_tag(o, 1);
@@ -79,6 +106,243 @@ void run_finalizer(jl_task_t *ct, void *o, void *ff)
79106
}
80107
}
81108

109+
void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
110+
{
111+
assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
112+
arraylist_t *a = &ptls->finalizers;
113+
// This acquire load and the release store at the end are used to
114+
// synchronize with `finalize_object` on another thread. Apart from the GC,
115+
// which is blocked by entering a unsafe region, there might be only
116+
// one other thread accessing our list in `finalize_object`
117+
// (only one thread since it needs to acquire the finalizer lock).
118+
// Similar to `finalize_object`, all content mutation has to be done
119+
// between the acquire and the release of the length.
120+
size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
121+
if (__unlikely(oldlen + 2 > a->max)) {
122+
JL_LOCK_NOGC(&finalizers_lock);
123+
// `a->len` might have been modified.
124+
// Another possibility is to always grow the array to `oldlen + 2` but
125+
// it's simpler this way and uses slightly less memory =)
126+
oldlen = a->len;
127+
arraylist_grow(a, 2);
128+
a->len = oldlen;
129+
JL_UNLOCK_NOGC(&finalizers_lock);
130+
}
131+
void **items = a->items;
132+
items[oldlen] = v;
133+
items[oldlen + 1] = f;
134+
jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
135+
}
136+
137+
// Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
138+
// to be hold for the current thread and will release the lock when the
139+
// function returns.
140+
void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
141+
{
142+
// Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
143+
// of `finalizer`) in a finalizer:
144+
uint8_t sticky = ct->sticky;
145+
// empty out the first two entries for the GC frame
146+
arraylist_push(list, list->items[0]);
147+
arraylist_push(list, list->items[1]);
148+
jl_gc_push_arraylist(ct, list);
149+
void **items = list->items;
150+
size_t len = list->len;
151+
JL_UNLOCK_NOGC(&finalizers_lock);
152+
// run finalizers in reverse order they were added, so lower-level finalizers run last
153+
for (size_t i = len-4; i >= 2; i -= 2)
154+
run_finalizer(ct, items[i], items[i + 1]);
155+
// first entries were moved last to make room for GC frame metadata
156+
run_finalizer(ct, items[len-2], items[len-1]);
157+
// matches the jl_gc_push_arraylist above
158+
JL_GC_POP();
159+
ct->sticky = sticky;
160+
}
161+
162+
void run_finalizers(jl_task_t *ct)
163+
{
164+
// Racy fast path:
165+
// The race here should be OK since the race can only happen if
166+
// another thread is writing to it with the lock held. In such case,
167+
// we don't need to run pending finalizers since the writer thread
168+
// will flush it.
169+
if (to_finalize.len == 0)
170+
return;
171+
JL_LOCK_NOGC(&finalizers_lock);
172+
if (to_finalize.len == 0) {
173+
JL_UNLOCK_NOGC(&finalizers_lock);
174+
return;
175+
}
176+
arraylist_t copied_list;
177+
memcpy(&copied_list, &to_finalize, sizeof(copied_list));
178+
if (to_finalize.items == to_finalize._space) {
179+
copied_list.items = copied_list._space;
180+
}
181+
jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
182+
arraylist_new(&to_finalize, 0);
183+
184+
uint64_t save_rngState[JL_RNG_SIZE];
185+
memcpy(&save_rngState[0], &ct->rngState[0], sizeof(save_rngState));
186+
jl_rng_split(ct->rngState, finalizer_rngState);
187+
188+
// This releases the finalizers lock.
189+
int8_t was_in_finalizer = ct->ptls->in_finalizer;
190+
ct->ptls->in_finalizer = 1;
191+
jl_gc_run_finalizers_in_list(ct, &copied_list);
192+
ct->ptls->in_finalizer = was_in_finalizer;
193+
arraylist_free(&copied_list);
194+
195+
memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
196+
}
197+
198+
// if `need_sync` is true, the `list` is the `finalizers` list of another
199+
// thread and we need additional synchronizations
200+
void finalize_object(arraylist_t *list, jl_value_t *o,
201+
arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
202+
{
203+
// The acquire load makes sure that the first `len` objects are valid.
204+
// If `need_sync` is true, all mutations of the content should be limited
205+
// to the first `oldlen` elements and no mutation is allowed after the
206+
// new length is published with the `cmpxchg` at the end of the function.
207+
// This way, the mutation should not conflict with the owning thread,
208+
// which only writes to locations later than `len`
209+
// and will not resize the buffer without acquiring the lock.
210+
size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
211+
size_t oldlen = len;
212+
void **items = list->items;
213+
size_t j = 0;
214+
for (size_t i = 0; i < len; i += 2) {
215+
void *v = items[i];
216+
int move = 0;
217+
if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
218+
void *f = items[i + 1];
219+
move = 1;
220+
arraylist_push(copied_list, v);
221+
arraylist_push(copied_list, f);
222+
}
223+
if (move || __unlikely(!v)) {
224+
// remove item
225+
}
226+
else {
227+
if (j < i) {
228+
items[j] = items[i];
229+
items[j+1] = items[i+1];
230+
}
231+
j += 2;
232+
}
233+
}
234+
len = j;
235+
if (oldlen == len)
236+
return;
237+
if (need_sync) {
238+
// The memset needs to be unconditional since the thread might have
239+
// already read the length.
240+
// The `memset` (like any other content mutation) has to be done
241+
// **before** the `cmpxchg` which publishes the length.
242+
memset(&items[len], 0, (oldlen - len) * sizeof(void*));
243+
jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
244+
}
245+
else {
246+
list->len = len;
247+
}
248+
}
249+
250+
JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
251+
{
252+
jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
253+
}
254+
255+
// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
256+
JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
257+
{
258+
assert(!gc_ptr_tag(v, 3));
259+
jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
260+
}
261+
262+
JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
263+
{
264+
if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
265+
jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
266+
}
267+
else {
268+
jl_gc_add_finalizer_(ptls, v, f);
269+
}
270+
}
271+
272+
JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
273+
{
274+
if (ct == NULL)
275+
ct = jl_current_task;
276+
jl_ptls_t ptls = ct->ptls;
277+
if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
278+
run_finalizers(ct);
279+
}
280+
}
281+
282+
JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
283+
{
284+
JL_LOCK_NOGC(&finalizers_lock);
285+
// Copy the finalizers into a temporary list so that code in the finalizer
286+
// won't change the list as we loop through them.
287+
// This list is also used as the GC frame when we are running the finalizers
288+
arraylist_t copied_list;
289+
arraylist_new(&copied_list, 0);
290+
// No need to check the to_finalize list since the user is apparently
291+
// still holding a reference to the object
292+
int gc_n_threads;
293+
jl_ptls_t* gc_all_tls_states;
294+
gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
295+
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
296+
for (int i = 0; i < gc_n_threads; i++) {
297+
jl_ptls_t ptls2 = gc_all_tls_states[i];
298+
if (ptls2 != NULL)
299+
finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
300+
}
301+
finalize_object(&finalizer_list_marked, o, &copied_list, 0);
302+
gc_n_threads = 0;
303+
gc_all_tls_states = NULL;
304+
if (copied_list.len > 0) {
305+
// This releases the finalizers lock.
306+
jl_gc_run_finalizers_in_list(ct, &copied_list);
307+
}
308+
else {
309+
JL_UNLOCK_NOGC(&finalizers_lock);
310+
}
311+
arraylist_free(&copied_list);
312+
}
313+
314+
void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
315+
{
316+
void **items = flist->items;
317+
size_t len = flist->len;
318+
for(size_t i = 0; i < len; i+=2) {
319+
void *v = items[i];
320+
void *f = items[i + 1];
321+
if (__unlikely(!v))
322+
continue;
323+
schedule_finalization(v, f);
324+
}
325+
flist->len = 0;
326+
}
327+
328+
void jl_gc_run_all_finalizers(jl_task_t *ct)
329+
{
330+
if (!ct) return;
331+
int gc_n_threads;
332+
jl_ptls_t* gc_all_tls_states;
333+
gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
334+
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
335+
schedule_all_finalizers(&finalizer_list_marked);
336+
for (int i = 0; i < gc_n_threads; i++) {
337+
jl_ptls_t ptls2 = gc_all_tls_states[i];
338+
if (ptls2 != NULL)
339+
schedule_all_finalizers(&ptls2->finalizers);
340+
}
341+
gc_n_threads = 0;
342+
gc_all_tls_states = NULL;
343+
run_finalizers(ct);
344+
}
345+
82346
JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
83347
{
84348
if (ptls == NULL)

0 commit comments

Comments
 (0)