@@ -18,6 +18,10 @@ int jl_n_markthreads;
1818int jl_n_sweepthreads ;
1919// Number of threads currently running the GC mark-loop
2020_Atomic(int ) gc_n_threads_marking ;
21+ // Number of threads sweeping
22+ _Atomic(int ) gc_n_threads_sweeping ;
23+ // Temporary for the `ptls->page_metadata_allocd` used during parallel sweeping
24+ _Atomic(jl_gc_page_stack_t * ) gc_allocd_scratch ;
2125// `tid` of mutator thread that triggered GC
2226_Atomic(int ) gc_master_tid ;
2327// `tid` of first GC thread
@@ -750,6 +754,7 @@ static int mark_reset_age = 0;
750754static int64_t scanned_bytes ; // young bytes scanned while marking
751755static int64_t perm_scanned_bytes ; // old bytes scanned while marking
752756int prev_sweep_full = 1 ;
757+ int current_sweep_full = 0 ;
753758int under_pressure = 0 ;
754759
755760// Full collection heuristics
@@ -1285,9 +1290,9 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_
12851290 return beg ;
12861291}
12871292
1288- jl_gc_global_page_pool_t global_page_pool_lazily_freed ;
1289- jl_gc_global_page_pool_t global_page_pool_clean ;
1290- jl_gc_global_page_pool_t global_page_pool_freed ;
1293+ jl_gc_page_stack_t global_page_pool_lazily_freed ;
1294+ jl_gc_page_stack_t global_page_pool_clean ;
1295+ jl_gc_page_stack_t global_page_pool_freed ;
12911296pagetable_t alloc_map ;
12921297
12931298// Add a new page to the pool. Discards any pages in `p->newpages` before.
@@ -1296,7 +1301,7 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
12961301 // Do not pass in `ptls` as argument. This slows down the fast path
12971302 // in pool_alloc significantly
12981303 jl_ptls_t ptls = jl_current_task -> ptls ;
1299- jl_gc_pagemeta_t * pg = pop_page_metadata_back (& ptls -> page_metadata_lazily_freed );
1304+ jl_gc_pagemeta_t * pg = pop_lf_back (& ptls -> page_metadata_lazily_freed );
13001305 if (pg != NULL ) {
13011306 gc_alloc_map_set (pg -> data , GC_PAGE_ALLOCATED );
13021307 }
@@ -1306,7 +1311,7 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
13061311 pg -> osize = p -> osize ;
13071312 pg -> thread_n = ptls -> tid ;
13081313 set_page_metadata (pg );
1309- push_page_metadata_back (& ptls -> page_metadata_allocd , pg );
1314+ push_lf_back (& ptls -> page_metadata_allocd , pg );
13101315 jl_taggedvalue_t * fl = gc_reset_page (ptls , p , pg );
13111316 jl_atomic_fetch_add_relaxed (& gc_heap_stats .heap_size , GC_PAGE_SZ );
13121317 p -> newpages = fl ;
@@ -1408,8 +1413,8 @@ int jl_gc_classify_pools(size_t sz, int *osize)
14081413int64_t lazy_freed_pages = 0 ;
14091414
14101415// Returns pointer to terminal pointer of list rooted at *pfl.
1411- static jl_taggedvalue_t * * gc_sweep_page (jl_gc_pool_t * p , jl_gc_pagemeta_t * * allocd ,
1412- jl_gc_pagemeta_t * * lazily_freed , jl_gc_pagemeta_t * pg , jl_taggedvalue_t * * pfl , int sweep_full , int osize ) JL_NOTSAFEPOINT
1416+ static void gc_sweep_page (jl_gc_pool_t * p , jl_gc_page_stack_t * allocd , jl_gc_page_stack_t * lazily_freed ,
1417+ jl_gc_pagemeta_t * pg , int osize ) JL_NOTSAFEPOINT
14131418{
14141419 char * data = pg -> data ;
14151420 jl_taggedvalue_t * v = (jl_taggedvalue_t * )(data + GC_PAGE_OFFSET );
@@ -1433,7 +1438,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14331438 // the eager one uses less memory.
14341439 // FIXME - need to do accounting on a per-thread basis
14351440 // on quick sweeps, keep a few pages empty but allocated for performance
1436- if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ ) {
1441+ if (!current_sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ ) {
14371442 lazy_freed_pages ++ ;
14381443 freed_lazily = 1 ;
14391444 }
@@ -1443,15 +1448,9 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14431448 }
14441449 // For quick sweep, we might be able to skip the page if the page doesn't
14451450 // have any young live cell before marking.
1446- if (!sweep_full && !pg -> has_young ) {
1451+ if (!current_sweep_full && !pg -> has_young ) {
14471452 assert (!prev_sweep_full || pg -> prev_nold >= pg -> nold );
14481453 if (!prev_sweep_full || pg -> prev_nold == pg -> nold ) {
1449- // the position of the freelist begin/end in this page
1450- // is stored in its metadata
1451- if (pg -> fl_begin_offset != (uint16_t )-1 ) {
1452- * pfl = page_pfl_beg (pg );
1453- pfl = (jl_taggedvalue_t * * )page_pfl_end (pg );
1454- }
14551454 freedall = 0 ;
14561455 nfree = pg -> nfree ;
14571456 goto done ;
@@ -1464,6 +1463,8 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14641463 int has_young = 0 ;
14651464 int16_t prev_nold = 0 ;
14661465 int pg_nfree = 0 ;
1466+ jl_taggedvalue_t * fl = NULL ;
1467+ jl_taggedvalue_t * * pfl = & fl ;
14671468 jl_taggedvalue_t * * pfl_begin = NULL ;
14681469 while ((char * )v <= lim ) {
14691470 int bits = v -> bits .gc ;
@@ -1475,7 +1476,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14751476 pg_nfree ++ ;
14761477 }
14771478 else { // marked young or old
1478- if (sweep_full || bits == GC_MARKED ) { // old enough
1479+ if (current_sweep_full || bits == GC_MARKED ) { // old enough
14791480 bits = v -> bits .gc = GC_OLD ; // promote
14801481 }
14811482 prev_nold ++ ;
@@ -1497,7 +1498,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14971498 }
14981499
14991500 pg -> nfree = pg_nfree ;
1500- if (sweep_full ) {
1501+ if (current_sweep_full ) {
15011502 pg -> nold = 0 ;
15021503 pg -> prev_nold = prev_nold ;
15031504 }
@@ -1506,45 +1507,44 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
15061507
15071508done :
15081509 if (re_use_page ) {
1509- push_page_metadata_back (allocd , pg );
1510+ push_lf_back (allocd , pg );
15101511 }
15111512 else if (freed_lazily ) {
15121513 gc_alloc_map_set (pg -> data , GC_PAGE_LAZILY_FREED );
1513- push_page_metadata_back (lazily_freed , pg );
1514+ push_lf_back (lazily_freed , pg );
15141515 jl_atomic_fetch_add_relaxed (& gc_heap_stats .heap_size , - GC_PAGE_SZ );
15151516 }
15161517 else {
15171518 jl_atomic_fetch_add_relaxed (& gc_heap_stats .heap_size , - GC_PAGE_SZ );
15181519 #ifdef _P64 // only enable concurrent sweeping on 64bit
15191520 if (jl_n_sweepthreads == 0 ) {
15201521 jl_gc_free_page (pg );
1521- push_lf_page_metadata_back (& global_page_pool_freed , pg );
1522+ push_lf_back (& global_page_pool_freed , pg );
15221523 }
15231524 else {
15241525 gc_alloc_map_set (pg -> data , GC_PAGE_LAZILY_FREED );
1525- push_lf_page_metadata_back (& global_page_pool_lazily_freed , pg );
1526+ push_lf_back (& global_page_pool_lazily_freed , pg );
15261527 }
15271528 #else
15281529 jl_gc_free_page (pg );
1529- push_lf_page_metadata_back (& global_page_pool_freed , pg );
1530+ push_lf_back (& global_page_pool_freed , pg );
15301531 #endif
15311532 }
15321533 gc_time_count_page (freedall , pg_skpd );
1533- gc_num .freed += (nfree - old_nfree ) * osize ;
1534+ jl_atomic_fetch_add (( _Atomic ( int64_t ) * ) & gc_num .freed , (nfree - old_nfree ) * osize ) ;
15341535 pool_live_bytes += GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize ;
1535- return pfl ;
15361536}
15371537
15381538// the actual sweeping over all allocated pages in a memory pool
1539- STATIC_INLINE void gc_sweep_pool_page (jl_taggedvalue_t * * * pfl , jl_gc_pagemeta_t * * allocd ,
1540- jl_gc_pagemeta_t * * lazily_freed , jl_gc_pagemeta_t * pg , int sweep_full ) JL_NOTSAFEPOINT
1539+ STATIC_INLINE void gc_sweep_pool_page (jl_gc_page_stack_t * allocd , jl_gc_page_stack_t * lazily_freed ,
1540+ jl_gc_pagemeta_t * pg ) JL_NOTSAFEPOINT
15411541{
15421542 int p_n = pg -> pool_n ;
15431543 int t_n = pg -> thread_n ;
15441544 jl_ptls_t ptls2 = gc_all_tls_states [t_n ];
15451545 jl_gc_pool_t * p = & ptls2 -> heap .norm_pools [p_n ];
15461546 int osize = pg -> osize ;
1547- pfl [ t_n * JL_GC_N_POOLS + p_n ] = gc_sweep_page (p , allocd , lazily_freed , pg , pfl [ t_n * JL_GC_N_POOLS + p_n ], sweep_full , osize );
1547+ gc_sweep_page (p , allocd , lazily_freed , pg , osize );
15481548}
15491549
15501550// sweep over all memory that is being used and not in a pool
@@ -1570,8 +1570,55 @@ static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_
15701570 pg -> nfree = nfree ;
15711571}
15721572
1573+ void gc_sweep_wake_all (void )
1574+ {
1575+ uv_mutex_lock (& gc_threads_lock );
1576+ for (int i = gc_first_tid ; i < gc_first_tid + jl_n_gcthreads ; i ++ ) {
1577+ jl_ptls_t ptls2 = gc_all_tls_states [i ];
1578+ jl_atomic_fetch_add (& ptls2 -> gc_sweeps_requested , 1 );
1579+ }
1580+ uv_cond_broadcast (& gc_threads_cond );
1581+ uv_mutex_unlock (& gc_threads_lock );
1582+ }
1583+
1584+ void gc_sweep_pool_parallel (void )
1585+ {
1586+ jl_atomic_fetch_add (& gc_n_threads_sweeping , 1 );
1587+ jl_gc_page_stack_t * allocd_scratch = jl_atomic_load (& gc_allocd_scratch );
1588+ if (allocd_scratch != NULL ) {
1589+ while (1 ) {
1590+ int found_pg = 0 ;
1591+ for (int t_i = 0 ; t_i < gc_n_threads ; t_i ++ ) {
1592+ jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1593+ if (ptls2 == NULL ) {
1594+ continue ;
1595+ }
1596+ jl_gc_page_stack_t * allocd = & allocd_scratch [t_i ];
1597+ jl_gc_pagemeta_t * pg = pop_lf_back (& ptls2 -> page_metadata_allocd );
1598+ if (pg == NULL ) {
1599+ continue ;
1600+ }
1601+ gc_sweep_pool_page (allocd , & ptls2 -> page_metadata_lazily_freed , pg );
1602+ found_pg = 1 ;
1603+ }
1604+ if (!found_pg ) {
1605+ break ;
1606+ }
1607+ }
1608+ }
1609+ jl_atomic_fetch_add (& gc_n_threads_sweeping , -1 );
1610+ }
1611+
1612+ void gc_sweep_wait_for_all (void )
1613+ {
1614+ jl_atomic_store (& gc_allocd_scratch , NULL );
1615+ while (jl_atomic_load_relaxed (& gc_n_threads_sweeping ) != 0 ) {
1616+ jl_cpu_pause ();
1617+ }
1618+ }
1619+
15731620// setup the data-structures for a sweep over all memory pools
1574- static void gc_sweep_pool (int sweep_full )
1621+ static void gc_sweep_pool (void )
15751622{
15761623 gc_time_pool_start ();
15771624 lazy_freed_pages = 0 ;
@@ -1614,7 +1661,7 @@ static void gc_sweep_pool(int sweep_full)
16141661 pg -> has_young = 1 ;
16151662 }
16161663 }
1617- jl_gc_pagemeta_t * pg = ptls2 -> page_metadata_lazily_freed ;
1664+ jl_gc_pagemeta_t * pg = jl_atomic_load_relaxed ( & ptls2 -> page_metadata_lazily_freed . bottom ) ;
16181665 while (pg != NULL ) {
16191666 jl_gc_pagemeta_t * pg2 = pg -> next ;
16201667 lazy_freed_pages ++ ;
@@ -1623,24 +1670,44 @@ static void gc_sweep_pool(int sweep_full)
16231670 }
16241671
16251672 // the actual sweeping
1673+ jl_gc_page_stack_t * tmp = (jl_gc_page_stack_t * )alloca (n_threads * sizeof (jl_gc_page_stack_t ));
1674+ memset (tmp , 0 , n_threads * sizeof (jl_gc_page_stack_t ));
1675+ jl_atomic_store (& gc_allocd_scratch , tmp );
1676+ gc_sweep_wake_all ();
1677+ gc_sweep_pool_parallel ();
1678+ gc_sweep_wait_for_all ();
1679+
16261680 for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
16271681 jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
16281682 if (ptls2 != NULL ) {
1629- jl_gc_pagemeta_t * allocd = NULL ;
1630- jl_gc_pagemeta_t * pg = ptls2 -> page_metadata_allocd ;
1631- while (pg != NULL ) {
1632- jl_gc_pagemeta_t * pg2 = pg -> next ;
1633- gc_sweep_pool_page (pfl , & allocd , & ptls2 -> page_metadata_lazily_freed , pg , sweep_full );
1634- pg = pg2 ;
1635- }
1636- ptls2 -> page_metadata_allocd = allocd ;
1683+ ptls2 -> page_metadata_allocd = tmp [t_i ];
16371684 for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
16381685 jl_gc_pool_t * p = & ptls2 -> heap .norm_pools [i ];
16391686 p -> newpages = NULL ;
16401687 }
16411688 }
16421689 }
16431690
1691+ // merge free lists
1692+ for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1693+ jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1694+ if (ptls2 == NULL ) {
1695+ continue ;
1696+ }
1697+ jl_gc_pagemeta_t * pg = jl_atomic_load_relaxed (& ptls2 -> page_metadata_allocd .bottom );
1698+ while (pg != NULL ) {
1699+ jl_gc_pagemeta_t * pg2 = pg -> next ;
1700+ if (pg -> fl_begin_offset != UINT16_MAX ) {
1701+ char * cur_pg = pg -> data ;
1702+ jl_taggedvalue_t * fl_beg = (jl_taggedvalue_t * )(cur_pg + pg -> fl_begin_offset );
1703+ jl_taggedvalue_t * fl_end = (jl_taggedvalue_t * )(cur_pg + pg -> fl_end_offset );
1704+ * pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = fl_beg ;
1705+ pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = & fl_end -> next ;
1706+ }
1707+ pg = pg2 ;
1708+ }
1709+ }
1710+
16441711 // null out terminal pointers of free lists
16451712 for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
16461713 jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
@@ -1658,7 +1725,7 @@ static void gc_sweep_pool(int sweep_full)
16581725 }
16591726#endif
16601727
1661- gc_time_pool_end (sweep_full );
1728+ gc_time_pool_end (current_sweep_full );
16621729}
16631730
16641731static void gc_sweep_perm_alloc (void )
@@ -3289,13 +3356,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
32893356#ifdef USE_TRACY
32903357 TracyCZoneColor (full_timing_block .tracy_ctx , 0xFFA500 );
32913358#endif
3359+ current_sweep_full = sweep_full ;
32923360 sweep_weak_refs ();
32933361 sweep_stack_pools ();
32943362 gc_sweep_foreign_objs ();
32953363 gc_sweep_other (ptls , sweep_full );
32963364 gc_scrub ();
32973365 gc_verify_tags ();
3298- gc_sweep_pool (sweep_full );
3366+ gc_sweep_pool ();
32993367 if (sweep_full )
33003368 gc_sweep_perm_alloc ();
33013369 }
0 commit comments