@@ -49,6 +49,14 @@ memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024;
4949// finalizers
5050// ---
5151uint64_t finalizer_rngState [JL_RNG_SIZE ];
52+ jl_mutex_t finalizers_lock ;
53+ // `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
54+ // If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
55+ // If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
56+ // It must be aligned at least 4, and it finalized immediately (at "quiescence").
57+ // `to_finalize` should not have tagged pointers.
58+ arraylist_t finalizer_list_marked ;
59+ arraylist_t to_finalize ;
5260
5361void jl_rng_split (uint64_t dst [JL_RNG_SIZE ], uint64_t src [JL_RNG_SIZE ]) JL_NOTSAFEPOINT ;
5462
@@ -57,6 +65,25 @@ JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
5765 jl_rng_split (finalizer_rngState , jl_current_task -> rngState );
5866}
5967
68+ // The first two entries are assumed to be empty and the rest are assumed to
69+ // be pointers to `jl_value_t` objects
70+ STATIC_INLINE void jl_gc_push_arraylist (jl_task_t * ct , arraylist_t * list ) JL_NOTSAFEPOINT
71+ {
72+ void * * items = list -> items ;
73+ items [0 ] = (void * )JL_GC_ENCODE_PUSHARGS (list -> len - 2 );
74+ items [1 ] = ct -> gcstack ;
75+ ct -> gcstack = (jl_gcframe_t * )items ;
76+ }
77+
78+ STATIC_INLINE void schedule_finalization (void * o , void * f ) JL_NOTSAFEPOINT
79+ {
80+ arraylist_push (& to_finalize , o );
81+ arraylist_push (& to_finalize , f );
82+ // doesn't need release, since we'll keep checking (on the reader) until we see the work and
83+ // release our lock, and that will have a release barrier by then
84+ jl_atomic_store_relaxed (& jl_gc_have_pending_finalizers , 1 );
85+ }
86+
6087void run_finalizer (jl_task_t * ct , void * o , void * ff )
6188{
6289 int ptr_finalizer = gc_ptr_tag (o , 1 );
@@ -79,6 +106,243 @@ void run_finalizer(jl_task_t *ct, void *o, void *ff)
79106 }
80107}
81108
109+ void jl_gc_add_finalizer_ (jl_ptls_t ptls , void * v , void * f ) JL_NOTSAFEPOINT
110+ {
111+ assert (jl_atomic_load_relaxed (& ptls -> gc_state ) == 0 );
112+ arraylist_t * a = & ptls -> finalizers ;
113+ // This acquire load and the release store at the end are used to
114+ // synchronize with `finalize_object` on another thread. Apart from the GC,
115+ // which is blocked by entering a unsafe region, there might be only
116+ // one other thread accessing our list in `finalize_object`
117+ // (only one thread since it needs to acquire the finalizer lock).
118+ // Similar to `finalize_object`, all content mutation has to be done
119+ // between the acquire and the release of the length.
120+ size_t oldlen = jl_atomic_load_acquire ((_Atomic (size_t )* )& a -> len );
121+ if (__unlikely (oldlen + 2 > a -> max )) {
122+ JL_LOCK_NOGC (& finalizers_lock );
123+ // `a->len` might have been modified.
124+ // Another possibility is to always grow the array to `oldlen + 2` but
125+ // it's simpler this way and uses slightly less memory =)
126+ oldlen = a -> len ;
127+ arraylist_grow (a , 2 );
128+ a -> len = oldlen ;
129+ JL_UNLOCK_NOGC (& finalizers_lock );
130+ }
131+ void * * items = a -> items ;
132+ items [oldlen ] = v ;
133+ items [oldlen + 1 ] = f ;
134+ jl_atomic_store_release ((_Atomic (size_t )* )& a -> len , oldlen + 2 );
135+ }
136+
137+ // Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
138+ // to be hold for the current thread and will release the lock when the
139+ // function returns.
140+ void jl_gc_run_finalizers_in_list (jl_task_t * ct , arraylist_t * list ) JL_NOTSAFEPOINT_LEAVE
141+ {
142+ // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
143+ // of `finalizer`) in a finalizer:
144+ uint8_t sticky = ct -> sticky ;
145+ // empty out the first two entries for the GC frame
146+ arraylist_push (list , list -> items [0 ]);
147+ arraylist_push (list , list -> items [1 ]);
148+ jl_gc_push_arraylist (ct , list );
149+ void * * items = list -> items ;
150+ size_t len = list -> len ;
151+ JL_UNLOCK_NOGC (& finalizers_lock );
152+ // run finalizers in reverse order they were added, so lower-level finalizers run last
153+ for (size_t i = len - 4 ; i >= 2 ; i -= 2 )
154+ run_finalizer (ct , items [i ], items [i + 1 ]);
155+ // first entries were moved last to make room for GC frame metadata
156+ run_finalizer (ct , items [len - 2 ], items [len - 1 ]);
157+ // matches the jl_gc_push_arraylist above
158+ JL_GC_POP ();
159+ ct -> sticky = sticky ;
160+ }
161+
162+ void run_finalizers (jl_task_t * ct )
163+ {
164+ // Racy fast path:
165+ // The race here should be OK since the race can only happen if
166+ // another thread is writing to it with the lock held. In such case,
167+ // we don't need to run pending finalizers since the writer thread
168+ // will flush it.
169+ if (to_finalize .len == 0 )
170+ return ;
171+ JL_LOCK_NOGC (& finalizers_lock );
172+ if (to_finalize .len == 0 ) {
173+ JL_UNLOCK_NOGC (& finalizers_lock );
174+ return ;
175+ }
176+ arraylist_t copied_list ;
177+ memcpy (& copied_list , & to_finalize , sizeof (copied_list ));
178+ if (to_finalize .items == to_finalize ._space ) {
179+ copied_list .items = copied_list ._space ;
180+ }
181+ jl_atomic_store_relaxed (& jl_gc_have_pending_finalizers , 0 );
182+ arraylist_new (& to_finalize , 0 );
183+
184+ uint64_t save_rngState [JL_RNG_SIZE ];
185+ memcpy (& save_rngState [0 ], & ct -> rngState [0 ], sizeof (save_rngState ));
186+ jl_rng_split (ct -> rngState , finalizer_rngState );
187+
188+ // This releases the finalizers lock.
189+ int8_t was_in_finalizer = ct -> ptls -> in_finalizer ;
190+ ct -> ptls -> in_finalizer = 1 ;
191+ jl_gc_run_finalizers_in_list (ct , & copied_list );
192+ ct -> ptls -> in_finalizer = was_in_finalizer ;
193+ arraylist_free (& copied_list );
194+
195+ memcpy (& ct -> rngState [0 ], & save_rngState [0 ], sizeof (save_rngState ));
196+ }
197+
198+ // if `need_sync` is true, the `list` is the `finalizers` list of another
199+ // thread and we need additional synchronizations
200+ void finalize_object (arraylist_t * list , jl_value_t * o ,
201+ arraylist_t * copied_list , int need_sync ) JL_NOTSAFEPOINT
202+ {
203+ // The acquire load makes sure that the first `len` objects are valid.
204+ // If `need_sync` is true, all mutations of the content should be limited
205+ // to the first `oldlen` elements and no mutation is allowed after the
206+ // new length is published with the `cmpxchg` at the end of the function.
207+ // This way, the mutation should not conflict with the owning thread,
208+ // which only writes to locations later than `len`
209+ // and will not resize the buffer without acquiring the lock.
210+ size_t len = need_sync ? jl_atomic_load_acquire ((_Atomic (size_t )* )& list -> len ) : list -> len ;
211+ size_t oldlen = len ;
212+ void * * items = list -> items ;
213+ size_t j = 0 ;
214+ for (size_t i = 0 ; i < len ; i += 2 ) {
215+ void * v = items [i ];
216+ int move = 0 ;
217+ if (o == (jl_value_t * )gc_ptr_clear_tag (v , 1 )) {
218+ void * f = items [i + 1 ];
219+ move = 1 ;
220+ arraylist_push (copied_list , v );
221+ arraylist_push (copied_list , f );
222+ }
223+ if (move || __unlikely (!v )) {
224+ // remove item
225+ }
226+ else {
227+ if (j < i ) {
228+ items [j ] = items [i ];
229+ items [j + 1 ] = items [i + 1 ];
230+ }
231+ j += 2 ;
232+ }
233+ }
234+ len = j ;
235+ if (oldlen == len )
236+ return ;
237+ if (need_sync ) {
238+ // The memset needs to be unconditional since the thread might have
239+ // already read the length.
240+ // The `memset` (like any other content mutation) has to be done
241+ // **before** the `cmpxchg` which publishes the length.
242+ memset (& items [len ], 0 , (oldlen - len ) * sizeof (void * ));
243+ jl_atomic_cmpswap ((_Atomic (size_t )* )& list -> len , & oldlen , len );
244+ }
245+ else {
246+ list -> len = len ;
247+ }
248+ }
249+
250+ JL_DLLEXPORT void jl_gc_add_ptr_finalizer (jl_ptls_t ptls , jl_value_t * v , void * f ) JL_NOTSAFEPOINT
251+ {
252+ jl_gc_add_finalizer_ (ptls , (void * )(((uintptr_t )v ) | 1 ), f );
253+ }
254+
255+ // schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
256+ JL_DLLEXPORT void jl_gc_add_quiescent (jl_ptls_t ptls , void * * v , void * f ) JL_NOTSAFEPOINT
257+ {
258+ assert (!gc_ptr_tag (v , 3 ));
259+ jl_gc_add_finalizer_ (ptls , (void * )(((uintptr_t )v ) | 3 ), f );
260+ }
261+
262+ JL_DLLEXPORT void jl_gc_add_finalizer_th (jl_ptls_t ptls , jl_value_t * v , jl_function_t * f ) JL_NOTSAFEPOINT
263+ {
264+ if (__unlikely (jl_typetagis (f , jl_voidpointer_type ))) {
265+ jl_gc_add_ptr_finalizer (ptls , v , jl_unbox_voidpointer (f ));
266+ }
267+ else {
268+ jl_gc_add_finalizer_ (ptls , v , f );
269+ }
270+ }
271+
272+ JL_DLLEXPORT void jl_gc_run_pending_finalizers (jl_task_t * ct )
273+ {
274+ if (ct == NULL )
275+ ct = jl_current_task ;
276+ jl_ptls_t ptls = ct -> ptls ;
277+ if (!ptls -> in_finalizer && ptls -> locks .len == 0 && ptls -> finalizers_inhibited == 0 ) {
278+ run_finalizers (ct );
279+ }
280+ }
281+
282+ JL_DLLEXPORT void jl_finalize_th (jl_task_t * ct , jl_value_t * o )
283+ {
284+ JL_LOCK_NOGC (& finalizers_lock );
285+ // Copy the finalizers into a temporary list so that code in the finalizer
286+ // won't change the list as we loop through them.
287+ // This list is also used as the GC frame when we are running the finalizers
288+ arraylist_t copied_list ;
289+ arraylist_new (& copied_list , 0 );
290+ // No need to check the to_finalize list since the user is apparently
291+ // still holding a reference to the object
292+ int gc_n_threads ;
293+ jl_ptls_t * gc_all_tls_states ;
294+ gc_n_threads = jl_atomic_load_acquire (& jl_n_threads );
295+ gc_all_tls_states = jl_atomic_load_relaxed (& jl_all_tls_states );
296+ for (int i = 0 ; i < gc_n_threads ; i ++ ) {
297+ jl_ptls_t ptls2 = gc_all_tls_states [i ];
298+ if (ptls2 != NULL )
299+ finalize_object (& ptls2 -> finalizers , o , & copied_list , jl_atomic_load_relaxed (& ct -> tid ) != i );
300+ }
301+ finalize_object (& finalizer_list_marked , o , & copied_list , 0 );
302+ gc_n_threads = 0 ;
303+ gc_all_tls_states = NULL ;
304+ if (copied_list .len > 0 ) {
305+ // This releases the finalizers lock.
306+ jl_gc_run_finalizers_in_list (ct , & copied_list );
307+ }
308+ else {
309+ JL_UNLOCK_NOGC (& finalizers_lock );
310+ }
311+ arraylist_free (& copied_list );
312+ }
313+
314+ void schedule_all_finalizers (arraylist_t * flist ) JL_NOTSAFEPOINT
315+ {
316+ void * * items = flist -> items ;
317+ size_t len = flist -> len ;
318+ for (size_t i = 0 ; i < len ; i += 2 ) {
319+ void * v = items [i ];
320+ void * f = items [i + 1 ];
321+ if (__unlikely (!v ))
322+ continue ;
323+ schedule_finalization (v , f );
324+ }
325+ flist -> len = 0 ;
326+ }
327+
328+ void jl_gc_run_all_finalizers (jl_task_t * ct )
329+ {
330+ if (!ct ) return ;
331+ int gc_n_threads ;
332+ jl_ptls_t * gc_all_tls_states ;
333+ gc_n_threads = jl_atomic_load_acquire (& jl_n_threads );
334+ gc_all_tls_states = jl_atomic_load_relaxed (& jl_all_tls_states );
335+ schedule_all_finalizers (& finalizer_list_marked );
336+ for (int i = 0 ; i < gc_n_threads ; i ++ ) {
337+ jl_ptls_t ptls2 = gc_all_tls_states [i ];
338+ if (ptls2 != NULL )
339+ schedule_all_finalizers (& ptls2 -> finalizers );
340+ }
341+ gc_n_threads = 0 ;
342+ gc_all_tls_states = NULL ;
343+ run_finalizers (ct );
344+ }
345+
82346JL_DLLEXPORT int jl_gc_get_finalizers_inhibited (jl_ptls_t ptls )
83347{
84348 if (ptls == NULL )
0 commit comments