@@ -21,11 +21,17 @@ int jl_n_sweepthreads;
2121// Number of threads currently running the GC mark-loop
2222_Atomic(int ) gc_n_threads_marking ;
2323// Number of threads sweeping
24- _Atomic(int ) gc_n_threads_sweeping ;
24+ _Atomic(int ) gc_n_threads_sweeping_pools ;
25+ // Number of threads sweeping stacks
26+ _Atomic(int ) gc_n_threads_sweeping_stacks ;
2527// Temporary for the `ptls->gc_tls.page_metadata_allocd` used during parallel sweeping (padded to avoid false sharing)
2628_Atomic(jl_gc_padded_page_stack_t * ) gc_allocd_scratch ;
2729// `tid` of mutator thread that triggered GC
2830_Atomic(int ) gc_master_tid ;
31+ // counter for sharing work when sweeping stacks
32+ _Atomic(int ) gc_ptls_sweep_idx ;
33+ // counter for round robin of giving back stack pages to the OS
34+ _Atomic(int ) gc_stack_free_idx ;
2935// `tid` of first GC thread
3036int gc_first_tid ;
3137// Mutex/cond used to synchronize wakeup of GC threads on parallel marking
@@ -1525,6 +1531,44 @@ static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
15251531 gc_num .total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start ;
15261532}
15271533
1534+ // wake up all threads to sweep the stacks
1535+ void gc_sweep_wake_all_stacks (jl_ptls_t ptls ) JL_NOTSAFEPOINT
1536+ {
1537+ uv_mutex_lock (& gc_threads_lock );
1538+ int first = gc_first_parallel_collector_thread_id ();
1539+ int last = gc_last_parallel_collector_thread_id ();
1540+ for (int i = first ; i <= last ; i ++ ) {
1541+ jl_ptls_t ptls2 = gc_all_tls_states [i ];
1542+ gc_check_ptls_of_parallel_collector_thread (ptls2 );
1543+ jl_atomic_fetch_add (& ptls2 -> gc_tls .gc_stack_sweep_requested , 1 );
1544+ }
1545+ uv_cond_broadcast (& gc_threads_cond );
1546+ uv_mutex_unlock (& gc_threads_lock );
1547+ return ;
1548+ }
1549+
1550+ void gc_sweep_wait_for_all_stacks (void ) JL_NOTSAFEPOINT
1551+ {
1552+ while ((jl_atomic_load_acquire (& gc_ptls_sweep_idx ) >= 0 ) || jl_atomic_load_acquire (& gc_n_threads_sweeping_stacks ) != 0 ) {
1553+ jl_cpu_pause ();
1554+ }
1555+ }
1556+
1557+ void sweep_stack_pools (jl_ptls_t ptls ) JL_NOTSAFEPOINT
1558+ {
1559+ // initialize ptls index for parallel sweeping of stack pools
1560+ assert (gc_n_threads );
1561+ int stack_free_idx = jl_atomic_load_relaxed (& gc_stack_free_idx );
1562+ if (stack_free_idx + 1 == gc_n_threads )
1563+ jl_atomic_store_relaxed (& gc_stack_free_idx , 0 );
1564+ else
1565+ jl_atomic_store_relaxed (& gc_stack_free_idx , stack_free_idx + 1 );
1566+ jl_atomic_store_release (& gc_ptls_sweep_idx , gc_n_threads - 1 ); // idx == gc_n_threads = release stacks to the OS so it's serial
1567+ gc_sweep_wake_all_stacks (ptls );
1568+ sweep_stack_pool_loop ();
1569+ gc_sweep_wait_for_all_stacks ();
1570+ }
1571+
15281572static void gc_pool_sync_nfree (jl_gc_pagemeta_t * pg , jl_taggedvalue_t * last ) JL_NOTSAFEPOINT
15291573{
15301574 assert (pg -> fl_begin_offset != UINT16_MAX );
@@ -1639,15 +1683,15 @@ void gc_sweep_wake_all(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_
16391683void gc_sweep_wait_for_all (void )
16401684{
16411685 jl_atomic_store (& gc_allocd_scratch , NULL );
1642- while (jl_atomic_load_relaxed ( & gc_n_threads_sweeping ) != 0 ) {
1686+ while (jl_atomic_load_acquire ( & gc_n_threads_sweeping_pools ) != 0 ) {
16431687 jl_cpu_pause ();
16441688 }
16451689}
16461690
16471691// sweep all pools
16481692void gc_sweep_pool_parallel (jl_ptls_t ptls )
16491693{
1650- jl_atomic_fetch_add (& gc_n_threads_sweeping , 1 );
1694+ jl_atomic_fetch_add (& gc_n_threads_sweeping_pools , 1 );
16511695 jl_gc_padded_page_stack_t * allocd_scratch = jl_atomic_load (& gc_allocd_scratch );
16521696 if (allocd_scratch != NULL ) {
16531697 gc_page_profiler_serializer_t serializer = gc_page_serializer_create ();
@@ -1692,7 +1736,7 @@ void gc_sweep_pool_parallel(jl_ptls_t ptls)
16921736 }
16931737 gc_page_serializer_destroy (& serializer );
16941738 }
1695- jl_atomic_fetch_add (& gc_n_threads_sweeping , -1 );
1739+ jl_atomic_fetch_add (& gc_n_threads_sweeping_pools , -1 );
16961740}
16971741
16981742// free all pages (i.e. through `madvise` on Linux) that were lazily freed
@@ -3604,7 +3648,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
36043648#endif
36053649 current_sweep_full = sweep_full ;
36063650 sweep_weak_refs ();
3607- sweep_stack_pools ();
3651+ sweep_stack_pools (ptls );
36083652 gc_sweep_foreign_objs ();
36093653 gc_sweep_other (ptls , sweep_full );
36103654 gc_scrub ();
0 commit comments