@@ -895,7 +895,13 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
895895 }
896896 objprofile_count (jl_typeof (jl_valueof (o )),
897897 mark_mode == GC_OLD_MARKED , page -> osize );
898- page -> has_marked = 1 ;
898+ // Test-exchange on `has_marked` to avoid too many strong atomic ops
899+ _Atomic(uint8_t ) * phas_marked = (_Atomic (uint8_t ) * )& page -> has_marked ;
900+ if (jl_atomic_load_relaxed (phas_marked ) == 0 ) {
901+ if (jl_atomic_exchange (phas_marked , 1 ) == 0 ) {
902+ jl_atomic_fetch_add (& gc_heap_stats .marked_pages , 1 );
903+ }
904+ }
899905#endif
900906}
901907
@@ -1520,8 +1526,9 @@ static void gc_sweep_page(jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pag
15201526 }
15211527 }
15221528 gc_time_count_page (freedall , pg_skpd );
1529+ jl_atomic_fetch_add (& gc_heap_stats .marked_pages , pg -> has_marked );
1530+ jl_atomic_fetch_add ((_Atomic (int64_t ) * )& pool_live_bytes , GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize );
15231531 jl_atomic_fetch_add ((_Atomic (int64_t ) * )& gc_num .freed , (nfree - old_nfree ) * osize );
1524- pool_live_bytes += GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize ;
15251532}
15261533
15271534// the actual sweeping over all allocated pages in a memory pool
@@ -1559,10 +1566,13 @@ static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_
15591566 pg -> nfree = nfree ;
15601567}
15611568
1562- void gc_sweep_wake_all (void )
1569+ void gc_sweep_wake_all (size_t n_workers )
15631570{
1571+ if (n_workers > jl_n_markthreads ) {
1572+ n_workers = jl_n_markthreads ;
1573+ }
15641574 uv_mutex_lock (& gc_threads_lock );
1565- for (int i = gc_first_tid ; i < gc_first_tid + jl_n_gcthreads ; i ++ ) {
1575+ for (int i = gc_first_tid ; i < gc_first_tid + jl_n_markthreads ; i ++ ) {
15661576 jl_ptls_t ptls2 = gc_all_tls_states [i ];
15671577 jl_atomic_fetch_add (& ptls2 -> gc_sweeps_requested , 1 );
15681578 }
@@ -1618,6 +1628,8 @@ void gc_free_pages(void)
16181628 }
16191629}
16201630
1631+ #define GC_MIN_PAGES_PER_WORKER (1 << 6)
1632+
16211633// setup the data-structures for a sweep over all memory pools
16221634static void gc_sweep_pool (void )
16231635{
@@ -1674,7 +1686,9 @@ static void gc_sweep_pool(void)
16741686 jl_gc_page_stack_t * tmp = (jl_gc_page_stack_t * )alloca (n_threads * sizeof (jl_gc_page_stack_t ));
16751687 memset (tmp , 0 , n_threads * sizeof (jl_gc_page_stack_t ));
16761688 jl_atomic_store (& gc_allocd_scratch , tmp );
1677- gc_sweep_wake_all ();
1689+ size_t n_workers = jl_atomic_load_relaxed (& gc_heap_stats .marked_pages ) / GC_MIN_PAGES_PER_WORKER ;
1690+ jl_atomic_store_relaxed (& gc_heap_stats .marked_pages , 0 );
1691+ gc_sweep_wake_all (n_workers );
16781692 gc_sweep_pool_parallel ();
16791693 gc_sweep_wait_for_all ();
16801694
@@ -1730,7 +1744,6 @@ static void gc_sweep_pool(void)
17301744#else
17311745 gc_free_pages ();
17321746#endif
1733-
17341747 gc_time_pool_end (current_sweep_full );
17351748}
17361749
0 commit comments