Skip to content

Commit 7843330

Browse files
authored
instrument GC to breakdown times spent in each step of sweeping (#176)
1 parent 0e5b029 commit 7843330

File tree

3 files changed

+60
-43
lines changed

3 files changed

+60
-43
lines changed

base/timing.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ struct GC_Num
2323
sweep_time ::Int64
2424
mark_time ::Int64
2525
total_sweep_time ::Int64
26+
total_sweep_page_walk_time ::Int64
27+
total_sweep_madvise_time ::Int64
28+
total_sweep_free_mallocd_memory_time ::Int64
2629
total_mark_time ::Int64
2730
last_full_sweep ::Int64
2831
last_incremental_sweep ::Int64

src/gc.c

Lines changed: 54 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,8 +1518,11 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa
15181518
// sweep over all memory that is being used and not in a pool
15191519
static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
15201520
{
1521+
uint64_t t_free_mallocd_memory_start = jl_hrtime();
15211522
sweep_malloced_arrays();
15221523
sweep_big(ptls);
1524+
uint64_t t_free_mallocd_memory_end = jl_hrtime();
1525+
gc_num.total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start;
15231526
}
15241527

15251528
static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
@@ -1776,66 +1779,74 @@ static void gc_sweep_pool(void)
17761779
}
17771780
}
17781781

1779-
// the actual sweeping
1780-
jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
1781-
jl_ptls_t ptls = jl_current_task->ptls;
1782-
gc_sweep_wake_all(ptls, new_gc_allocd_scratch);
1783-
gc_sweep_pool_parallel(ptls);
1784-
gc_sweep_wait_for_all();
1785-
1786-
// reset half-pages pointers
1787-
for (int t_i = 0; t_i < n_threads; t_i++) {
1788-
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
1789-
if (ptls2 != NULL) {
1790-
ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
1791-
for (int i = 0; i < JL_GC_N_POOLS; i++) {
1792-
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
1793-
p->newpages = NULL;
1782+
uint64_t t_page_walk_start = jl_hrtime();
1783+
{
1784+
// the actual sweeping
1785+
jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t));
1786+
jl_ptls_t ptls = jl_current_task->ptls;
1787+
gc_sweep_wake_all(ptls, new_gc_allocd_scratch);
1788+
gc_sweep_pool_parallel(ptls);
1789+
gc_sweep_wait_for_all();
1790+
1791+
// reset half-pages pointers
1792+
for (int t_i = 0; t_i < n_threads; t_i++) {
1793+
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
1794+
if (ptls2 != NULL) {
1795+
ptls2->gc_tls.page_metadata_allocd = new_gc_allocd_scratch[t_i].stack;
1796+
for (int i = 0; i < JL_GC_N_POOLS; i++) {
1797+
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
1798+
p->newpages = NULL;
1799+
}
17941800
}
17951801
}
1796-
}
17971802

1798-
// merge free lists
1799-
for (int t_i = 0; t_i < n_threads; t_i++) {
1800-
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
1801-
if (ptls2 == NULL) {
1802-
continue;
1803-
}
1804-
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
1805-
while (pg != NULL) {
1806-
jl_gc_pagemeta_t *pg2 = pg->next;
1807-
if (pg->fl_begin_offset != UINT16_MAX) {
1808-
char *cur_pg = pg->data;
1809-
jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
1810-
jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
1811-
*pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
1812-
pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
1803+
// merge free lists
1804+
for (int t_i = 0; t_i < n_threads; t_i++) {
1805+
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
1806+
if (ptls2 == NULL) {
1807+
continue;
1808+
}
1809+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
1810+
while (pg != NULL) {
1811+
jl_gc_pagemeta_t *pg2 = pg->next;
1812+
if (pg->fl_begin_offset != UINT16_MAX) {
1813+
char *cur_pg = pg->data;
1814+
jl_taggedvalue_t *fl_beg = (jl_taggedvalue_t*)(cur_pg + pg->fl_begin_offset);
1815+
jl_taggedvalue_t *fl_end = (jl_taggedvalue_t*)(cur_pg + pg->fl_end_offset);
1816+
*pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = fl_beg;
1817+
pfl[t_i * JL_GC_N_POOLS + pg->pool_n] = &fl_end->next;
1818+
}
1819+
pg = pg2;
18131820
}
1814-
pg = pg2;
18151821
}
1816-
}
18171822

1818-
// null out terminal pointers of free lists
1819-
for (int t_i = 0; t_i < n_threads; t_i++) {
1820-
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
1821-
if (ptls2 != NULL) {
1822-
for (int i = 0; i < JL_GC_N_POOLS; i++) {
1823-
*pfl[t_i * JL_GC_N_POOLS + i] = NULL;
1823+
// null out terminal pointers of free lists
1824+
for (int t_i = 0; t_i < n_threads; t_i++) {
1825+
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
1826+
if (ptls2 != NULL) {
1827+
for (int i = 0; i < JL_GC_N_POOLS; i++) {
1828+
*pfl[t_i * JL_GC_N_POOLS + i] = NULL;
1829+
}
18241830
}
18251831
}
1826-
}
18271832

1828-
// cleanup
1829-
free(pfl);
1830-
free(new_gc_allocd_scratch);
1833+
// cleanup
1834+
free(pfl);
1835+
free(new_gc_allocd_scratch);
1836+
}
1837+
uint64_t t_page_walk_end = jl_hrtime();
1838+
gc_num.total_sweep_page_walk_time += t_page_walk_end - t_page_walk_start;
18311839

18321840
#ifdef _P64 // only enable concurrent sweeping on 64bit
18331841
// wake thread up to sweep concurrently
18341842
if (jl_n_sweepthreads > 0) {
18351843
uv_sem_post(&gc_sweep_assists_needed);
18361844
}
18371845
else {
1846+
uint64_t t_madvise_start = jl_hrtime();
18381847
gc_free_pages();
1848+
uint64_t t_madvise_end = jl_hrtime();
1849+
gc_num.total_sweep_madvise_time += t_madvise_end - t_madvise_start;
18391850
}
18401851
#else
18411852
gc_free_pages();

src/gc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ typedef struct {
8383
uint64_t sweep_time;
8484
uint64_t mark_time;
8585
uint64_t total_sweep_time;
86+
uint64_t total_sweep_page_walk_time;
87+
uint64_t total_sweep_madvise_time;
88+
uint64_t total_sweep_free_mallocd_memory_time;
8689
uint64_t total_mark_time;
8790
uint64_t last_full_sweep;
8891
uint64_t last_incremental_sweep;

0 commit comments

Comments
 (0)