@@ -26,6 +26,10 @@ _Atomic(int) gc_n_threads_sweeping;
2626_Atomic(jl_gc_padded_page_stack_t * ) gc_allocd_scratch ;
2727// `tid` of mutator thread that triggered GC
2828_Atomic(int ) gc_master_tid ;
29+ // counter for sharing work when sweeping stacks
30+ _Atomic(int ) gc_ptls_sweep_idx ;
31+ // counter for round robin of giving back stack pages to the OS
32+ _Atomic(int ) gc_stack_free_idx ;
2933// `tid` of first GC thread
3034int gc_first_tid ;
3135// Mutex/cond used to synchronize wakeup of GC threads on parallel marking
@@ -1525,6 +1529,44 @@ static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
15251529 gc_num .total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start ;
15261530}
15271531
1532+ // wake up all threads to sweep the stacks
1533+ void gc_sweep_wake_all_stacks (jl_ptls_t ptls ) JL_NOTSAFEPOINT
1534+ {
1535+ uv_mutex_lock (& gc_threads_lock );
1536+ int first = gc_first_parallel_collector_thread_id ();
1537+ int last = gc_last_parallel_collector_thread_id ();
1538+ for (int i = first ; i <= last ; i ++ ) {
1539+ jl_ptls_t ptls2 = gc_all_tls_states [i ];
1540+ gc_check_ptls_of_parallel_collector_thread (ptls2 );
1541+ jl_atomic_fetch_add (& ptls2 -> gc_tls .gc_stack_sweep_requested , 1 );
1542+ }
1543+ uv_cond_broadcast (& gc_threads_cond );
1544+ uv_mutex_unlock (& gc_threads_lock );
1545+ return ;
1546+ }
1547+
1548+ void gc_sweep_wait_for_all_stacks (void ) JL_NOTSAFEPOINT
1549+ {
1550+ while ((jl_atomic_load_acquire (& gc_ptls_sweep_idx )>= 0 ) || jl_atomic_load_acquire (& gc_n_threads_sweeping ) != 0 ) {
1551+ jl_cpu_pause ();
1552+ }
1553+ }
1554+
1555+ void sweep_stack_pools (jl_ptls_t ptls ) JL_NOTSAFEPOINT
1556+ {
1557+ // initialize ptls index for parallel sweeping of stack pools
1558+ assert (gc_n_threads );
1559+ int stack_free_idx = jl_atomic_load_relaxed (& gc_stack_free_idx );
1560+ if (stack_free_idx + 1 == gc_n_threads )
1561+ jl_atomic_store_relaxed (& gc_stack_free_idx , 0 );
1562+ else
1563+ jl_atomic_store_relaxed (& gc_stack_free_idx , stack_free_idx + 1 );
1564+ jl_atomic_store_release (& gc_ptls_sweep_idx , gc_n_threads - 1 ); // idx == gc_n_threads = release stacks to the OS so it's serial
1565+ gc_sweep_wake_all_stacks (ptls );
1566+ sweep_stack_pool_loop ();
1567+ gc_sweep_wait_for_all_stacks ();
1568+ }
1569+
15281570static void gc_pool_sync_nfree (jl_gc_pagemeta_t * pg , jl_taggedvalue_t * last ) JL_NOTSAFEPOINT
15291571{
15301572 assert (pg -> fl_begin_offset != UINT16_MAX );
@@ -3604,7 +3646,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
36043646#endif
36053647 current_sweep_full = sweep_full ;
36063648 sweep_weak_refs ();
3607- sweep_stack_pools ();
3649+ sweep_stack_pools (ptls );
36083650 gc_sweep_foreign_objs ();
36093651 gc_sweep_other (ptls , sweep_full );
36103652 gc_scrub ();
0 commit comments