Skip to content

Commit a03e7fe

Browse files
committed
parallelize sweeping of object pools
1 parent 5d82d80 commit a03e7fe

File tree

8 files changed

+158
-76
lines changed

8 files changed

+158
-76
lines changed

src/gc-debug.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ static void gc_clear_mark_outer(int bits)
115115
{
116116
for (int i = 0; i < gc_n_threads; i++) {
117117
jl_ptls_t ptls2 = gc_all_tls_states[i];
118-
jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
118+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
119119
while (pg != NULL) {
120120
gc_clear_mark_page(pg, bits);
121121
pg = pg->next;
@@ -1153,7 +1153,7 @@ static void gc_count_pool_pagetable(void)
11531153
{
11541154
for (int i = 0; i < gc_n_threads; i++) {
11551155
jl_ptls_t ptls2 = gc_all_tls_states[i];
1156-
jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
1156+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
11571157
while (pg != NULL) {
11581158
if (gc_alloc_map_is_set(pg->data)) {
11591159
gc_count_pool_page(pg);

src/gc-pages.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,22 +100,22 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
100100
jl_gc_pagemeta_t *meta = NULL;
101101

102102
// try to get page from `pool_lazily_freed`
103-
meta = pop_lf_page_metadata_back(&global_page_pool_lazily_freed);
103+
meta = pop_lf_back(&global_page_pool_lazily_freed);
104104
if (meta != NULL) {
105105
gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
106106
// page is already mapped
107107
return meta;
108108
}
109109

110110
// try to get page from `pool_clean`
111-
meta = pop_lf_page_metadata_back(&global_page_pool_clean);
111+
meta = pop_lf_back(&global_page_pool_clean);
112112
if (meta != NULL) {
113113
gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
114114
goto exit;
115115
}
116116

117117
// try to get page from `pool_freed`
118-
meta = pop_lf_page_metadata_back(&global_page_pool_freed);
118+
meta = pop_lf_back(&global_page_pool_freed);
119119
if (meta != NULL) {
120120
jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_resident, GC_PAGE_SZ);
121121
gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
@@ -124,7 +124,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
124124

125125
uv_mutex_lock(&gc_perm_lock);
126126
// another thread may have allocated a large block while we were waiting...
127-
meta = pop_lf_page_metadata_back(&global_page_pool_clean);
127+
meta = pop_lf_back(&global_page_pool_clean);
128128
if (meta != NULL) {
129129
uv_mutex_unlock(&gc_perm_lock);
130130
gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
@@ -138,10 +138,10 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
138138
pg->data = data + GC_PAGE_SZ * i;
139139
gc_alloc_map_maybe_create(pg->data);
140140
if (i == 0) {
141-
gc_alloc_map_set(pg->data, 1);
141+
gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED);
142142
}
143143
else {
144-
push_lf_page_metadata_back(&global_page_pool_clean, pg);
144+
push_lf_back(&global_page_pool_clean, pg);
145145
}
146146
}
147147
uv_mutex_unlock(&gc_perm_lock);

src/gc.c

Lines changed: 107 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ int jl_n_markthreads;
1818
int jl_n_sweepthreads;
1919
// Number of threads currently running the GC mark-loop
2020
_Atomic(int) gc_n_threads_marking;
21+
// Number of threads sweeping
22+
_Atomic(int) gc_n_threads_sweeping;
23+
// Temporary for the `ptls->page_metadata_allocd` used during parallel sweeping
24+
_Atomic(jl_gc_page_stack_t *) gc_allocd_scratch;
2125
// `tid` of mutator thread that triggered GC
2226
_Atomic(int) gc_master_tid;
2327
// `tid` of first GC thread
@@ -750,6 +754,7 @@ static int mark_reset_age = 0;
750754
static int64_t scanned_bytes; // young bytes scanned while marking
751755
static int64_t perm_scanned_bytes; // old bytes scanned while marking
752756
int prev_sweep_full = 1;
757+
int current_sweep_full = 0;
753758
int under_pressure = 0;
754759

755760
// Full collection heuristics
@@ -1285,9 +1290,9 @@ STATIC_INLINE jl_taggedvalue_t *gc_reset_page(jl_ptls_t ptls2, const jl_gc_pool_
12851290
return beg;
12861291
}
12871292

1288-
jl_gc_global_page_pool_t global_page_pool_lazily_freed;
1289-
jl_gc_global_page_pool_t global_page_pool_clean;
1290-
jl_gc_global_page_pool_t global_page_pool_freed;
1293+
jl_gc_page_stack_t global_page_pool_lazily_freed;
1294+
jl_gc_page_stack_t global_page_pool_clean;
1295+
jl_gc_page_stack_t global_page_pool_freed;
12911296
pagetable_t alloc_map;
12921297

12931298
// Add a new page to the pool. Discards any pages in `p->newpages` before.
@@ -1296,7 +1301,7 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
12961301
// Do not pass in `ptls` as argument. This slows down the fast path
12971302
// in pool_alloc significantly
12981303
jl_ptls_t ptls = jl_current_task->ptls;
1299-
jl_gc_pagemeta_t *pg = pop_page_metadata_back(&ptls->page_metadata_lazily_freed);
1304+
jl_gc_pagemeta_t *pg = pop_lf_back(&ptls->page_metadata_lazily_freed);
13001305
if (pg != NULL) {
13011306
gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED);
13021307
}
@@ -1306,7 +1311,7 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
13061311
pg->osize = p->osize;
13071312
pg->thread_n = ptls->tid;
13081313
set_page_metadata(pg);
1309-
push_page_metadata_back(&ptls->page_metadata_allocd, pg);
1314+
push_lf_back(&ptls->page_metadata_allocd, pg);
13101315
jl_taggedvalue_t *fl = gc_reset_page(ptls, p, pg);
13111316
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, GC_PAGE_SZ);
13121317
p->newpages = fl;
@@ -1408,8 +1413,8 @@ int jl_gc_classify_pools(size_t sz, int *osize)
14081413
int64_t lazy_freed_pages = 0;
14091414

14101415
// Returns pointer to terminal pointer of list rooted at *pfl.
1411-
static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allocd,
1412-
jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT
1416+
static void gc_sweep_page(jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *lazily_freed,
1417+
jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
14131418
{
14141419
char *data = pg->data;
14151420
jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
@@ -1433,7 +1438,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14331438
// the eager one uses less memory.
14341439
// FIXME - need to do accounting on a per-thread basis
14351440
// on quick sweeps, keep a few pages empty but allocated for performance
1436-
if (!sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) {
1441+
if (!current_sweep_full && lazy_freed_pages <= default_collect_interval / GC_PAGE_SZ) {
14371442
lazy_freed_pages++;
14381443
freed_lazily = 1;
14391444
}
@@ -1443,15 +1448,9 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14431448
}
14441449
// For quick sweep, we might be able to skip the page if the page doesn't
14451450
// have any young live cell before marking.
1446-
if (!sweep_full && !pg->has_young) {
1451+
if (!current_sweep_full && !pg->has_young) {
14471452
assert(!prev_sweep_full || pg->prev_nold >= pg->nold);
14481453
if (!prev_sweep_full || pg->prev_nold == pg->nold) {
1449-
// the position of the freelist begin/end in this page
1450-
// is stored in its metadata
1451-
if (pg->fl_begin_offset != (uint16_t)-1) {
1452-
*pfl = page_pfl_beg(pg);
1453-
pfl = (jl_taggedvalue_t**)page_pfl_end(pg);
1454-
}
14551454
freedall = 0;
14561455
nfree = pg->nfree;
14571456
goto done;
@@ -1464,6 +1463,8 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14641463
int has_young = 0;
14651464
int16_t prev_nold = 0;
14661465
int pg_nfree = 0;
1466+
jl_taggedvalue_t *fl = NULL;
1467+
jl_taggedvalue_t **pfl = &fl;
14671468
jl_taggedvalue_t **pfl_begin = NULL;
14681469
while ((char*)v <= lim) {
14691470
int bits = v->bits.gc;
@@ -1475,7 +1476,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14751476
pg_nfree++;
14761477
}
14771478
else { // marked young or old
1478-
if (sweep_full || bits == GC_MARKED) { // old enough
1479+
if (current_sweep_full || bits == GC_MARKED) { // old enough
14791480
bits = v->bits.gc = GC_OLD; // promote
14801481
}
14811482
prev_nold++;
@@ -1497,7 +1498,7 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
14971498
}
14981499

14991500
pg->nfree = pg_nfree;
1500-
if (sweep_full) {
1501+
if (current_sweep_full) {
15011502
pg->nold = 0;
15021503
pg->prev_nold = prev_nold;
15031504
}
@@ -1506,45 +1507,44 @@ static jl_taggedvalue_t **gc_sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t **allo
15061507

15071508
done:
15081509
if (re_use_page) {
1509-
push_page_metadata_back(allocd, pg);
1510+
push_lf_back(allocd, pg);
15101511
}
15111512
else if (freed_lazily) {
15121513
gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
1513-
push_page_metadata_back(lazily_freed, pg);
1514+
push_lf_back(lazily_freed, pg);
15141515
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
15151516
}
15161517
else {
15171518
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
15181519
#ifdef _P64 // only enable concurrent sweeping on 64bit
15191520
if (jl_n_sweepthreads == 0) {
15201521
jl_gc_free_page(pg);
1521-
push_lf_page_metadata_back(&global_page_pool_freed, pg);
1522+
push_lf_back(&global_page_pool_freed, pg);
15221523
}
15231524
else {
15241525
gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
1525-
push_lf_page_metadata_back(&global_page_pool_lazily_freed, pg);
1526+
push_lf_back(&global_page_pool_lazily_freed, pg);
15261527
}
15271528
#else
15281529
jl_gc_free_page(pg);
1529-
push_lf_page_metadata_back(&global_page_pool_freed, pg);
1530+
push_lf_back(&global_page_pool_freed, pg);
15301531
#endif
15311532
}
15321533
gc_time_count_page(freedall, pg_skpd);
1533-
gc_num.freed += (nfree - old_nfree) * osize;
1534+
jl_atomic_fetch_add((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize);
15341535
pool_live_bytes += GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize;
1535-
return pfl;
15361536
}
15371537

15381538
// the actual sweeping over all allocated pages in a memory pool
1539-
STATIC_INLINE void gc_sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t **allocd,
1540-
jl_gc_pagemeta_t **lazily_freed, jl_gc_pagemeta_t *pg, int sweep_full) JL_NOTSAFEPOINT
1539+
STATIC_INLINE void gc_sweep_pool_page(jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *lazily_freed,
1540+
jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
15411541
{
15421542
int p_n = pg->pool_n;
15431543
int t_n = pg->thread_n;
15441544
jl_ptls_t ptls2 = gc_all_tls_states[t_n];
15451545
jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
15461546
int osize = pg->osize;
1547-
pfl[t_n * JL_GC_N_POOLS + p_n] = gc_sweep_page(p, allocd, lazily_freed, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize);
1547+
gc_sweep_page(p, allocd, lazily_freed, pg, osize);
15481548
}
15491549

15501550
// sweep over all memory that is being used and not in a pool
@@ -1570,8 +1570,55 @@ static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_
15701570
pg->nfree = nfree;
15711571
}
15721572

1573+
void gc_sweep_wake_all(void)
1574+
{
1575+
uv_mutex_lock(&gc_threads_lock);
1576+
for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) {
1577+
jl_ptls_t ptls2 = gc_all_tls_states[i];
1578+
jl_atomic_fetch_add(&ptls2->gc_sweeps_requested, 1);
1579+
}
1580+
uv_cond_broadcast(&gc_threads_cond);
1581+
uv_mutex_unlock(&gc_threads_lock);
1582+
}
1583+
1584+
void gc_sweep_pool_parallel(void)
1585+
{
1586+
jl_atomic_fetch_add(&gc_n_threads_sweeping, 1);
1587+
jl_gc_page_stack_t *allocd_scratch = jl_atomic_load(&gc_allocd_scratch);
1588+
if (allocd_scratch != NULL) {
1589+
while (1) {
1590+
int found_pg = 0;
1591+
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
1592+
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
1593+
if (ptls2 == NULL) {
1594+
continue;
1595+
}
1596+
jl_gc_page_stack_t *allocd = &allocd_scratch[t_i];
1597+
jl_gc_pagemeta_t *pg = pop_lf_back(&ptls2->page_metadata_allocd);
1598+
if (pg == NULL) {
1599+
continue;
1600+
}
1601+
gc_sweep_pool_page(allocd, &ptls2->page_metadata_lazily_freed, pg);
1602+
found_pg = 1;
1603+
}
1604+
if (!found_pg) {
1605+
break;
1606+
}
1607+
}
1608+
}
1609+
jl_atomic_fetch_add(&gc_n_threads_sweeping, -1);
1610+
}
1611+
1612+
void gc_sweep_wait_for_all(void)
1613+
{
1614+
jl_atomic_store(&gc_allocd_scratch, NULL);
1615+
while (jl_atomic_load_relaxed(&gc_n_threads_sweeping) != 0) {
1616+
jl_cpu_pause();
1617+
}
1618+
}
1619+
15731620
// setup the data-structures for a sweep over all memory pools
1574-
static void gc_sweep_pool(int sweep_full)
1621+
static void gc_sweep_pool(void)
15751622
{
15761623
gc_time_pool_start();
15771624
lazy_freed_pages = 0;
@@ -1614,7 +1661,7 @@ static void gc_sweep_pool(int sweep_full)
16141661
pg->has_young = 1;
16151662
}
16161663
}
1617-
jl_gc_pagemeta_t *pg = ptls2->page_metadata_lazily_freed;
1664+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_lazily_freed.bottom);
16181665
while (pg != NULL) {
16191666
jl_gc_pagemeta_t *pg2 = pg->next;
16201667
lazy_freed_pages++;
@@ -1623,24 +1670,44 @@ static void gc_sweep_pool(int sweep_full)
16231670
}
16241671

16251672
// the actual sweeping
1673+
jl_gc_page_stack_t *tmp = (jl_gc_page_stack_t *)alloca(n_threads * sizeof(jl_gc_page_stack_t));
1674+
memset(tmp, 0, n_threads * sizeof(jl_gc_page_stack_t));
1675+
jl_atomic_store(&gc_allocd_scratch, tmp);
1676+
gc_sweep_wake_all();
1677+
gc_sweep_pool_parallel();
1678+
gc_sweep_wait_for_all();
1679+
16261680
for (int t_i = 0; t_i < n_threads; t_i++) {
16271681
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
16281682
if (ptls2 != NULL) {
1629-
jl_gc_pagemeta_t *allocd = NULL;
1630-
jl_gc_pagemeta_t *pg = ptls2->page_metadata_allocd;
1631-
while (pg != NULL) {
1632-
jl_gc_pagemeta_t *pg2 = pg->next;
1633-
gc_sweep_pool_page(pfl, &allocd, &ptls2->page_metadata_lazily_freed, pg, sweep_full);
1634-
pg = pg2;
1635-
}
1636-
ptls2->page_metadata_allocd = allocd;
1683+
ptls2->page_metadata_allocd = tmp[t_i];
16371684
for (int i = 0; i < JL_GC_N_POOLS; i++) {
16381685
jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
16391686
p->newpages = NULL;
16401687
}
16411688
}
16421689
}
16431690

1691+
// merge free lists
1692+
for (int t_i = 0; t_i < n_threads; t_i++) {
1693+
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
1694+
if (ptls2 == NULL) {
1695+
continue;
1696+
}
1697+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
1698+
while (pg != NULL) {
1699+
jl_gc_pagemeta_t *pg2 = pg->next;
1700+
if (pg->fl_begin_offset != UINT16_MAX) {
1701+
char *cur_pg = pg->data;
1702+
jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
1703+
jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
1704+
*pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
1705+
pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
1706+
}
1707+
pg = pg2;
1708+
}
1709+
}
1710+
16441711
// null out terminal pointers of free lists
16451712
for (int t_i = 0; t_i < n_threads; t_i++) {
16461713
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
@@ -1658,7 +1725,7 @@ static void gc_sweep_pool(int sweep_full)
16581725
}
16591726
#endif
16601727

1661-
gc_time_pool_end(sweep_full);
1728+
gc_time_pool_end(current_sweep_full);
16621729
}
16631730

16641731
static void gc_sweep_perm_alloc(void)
@@ -3289,13 +3356,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
32893356
#ifdef USE_TRACY
32903357
TracyCZoneColor(full_timing_block.tracy_ctx, 0xFFA500);
32913358
#endif
3359+
current_sweep_full = sweep_full;
32923360
sweep_weak_refs();
32933361
sweep_stack_pools();
32943362
gc_sweep_foreign_objs();
32953363
gc_sweep_other(ptls, sweep_full);
32963364
gc_scrub();
32973365
gc_verify_tags();
3298-
gc_sweep_pool(sweep_full);
3366+
gc_sweep_pool();
32993367
if (sweep_full)
33003368
gc_sweep_perm_alloc();
33013369
}

0 commit comments

Comments
 (0)