@@ -1518,8 +1518,11 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa
15181518// sweep over all memory that is being used and not in a pool
15191519static void gc_sweep_other (jl_ptls_t ptls , int sweep_full ) JL_NOTSAFEPOINT
15201520{
1521+ uint64_t t_free_mallocd_memory_start = jl_hrtime ();
15211522 sweep_malloced_arrays ();
15221523 sweep_big (ptls );
1524+ uint64_t t_free_mallocd_memory_end = jl_hrtime ();
1525+ gc_num .total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start ;
15231526}
15241527
15251528static void gc_pool_sync_nfree (jl_gc_pagemeta_t * pg , jl_taggedvalue_t * last ) JL_NOTSAFEPOINT
@@ -1776,66 +1779,74 @@ static void gc_sweep_pool(void)
17761779 }
17771780 }
17781781
1779- // the actual sweeping
1780- jl_gc_padded_page_stack_t * new_gc_allocd_scratch = (jl_gc_padded_page_stack_t * ) calloc_s (n_threads * sizeof (jl_gc_padded_page_stack_t ));
1781- jl_ptls_t ptls = jl_current_task -> ptls ;
1782- gc_sweep_wake_all (ptls , new_gc_allocd_scratch );
1783- gc_sweep_pool_parallel (ptls );
1784- gc_sweep_wait_for_all ();
1785-
1786- // reset half-pages pointers
1787- for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1788- jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1789- if (ptls2 != NULL ) {
1790- ptls2 -> gc_tls .page_metadata_allocd = new_gc_allocd_scratch [t_i ].stack ;
1791- for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
1792- jl_gc_pool_t * p = & ptls2 -> gc_tls .heap .norm_pools [i ];
1793- p -> newpages = NULL ;
1782+ uint64_t t_page_walk_start = jl_hrtime ();
1783+ {
1784+ // the actual sweeping
1785+ jl_gc_padded_page_stack_t * new_gc_allocd_scratch = (jl_gc_padded_page_stack_t * ) calloc_s (n_threads * sizeof (jl_gc_padded_page_stack_t ));
1786+ jl_ptls_t ptls = jl_current_task -> ptls ;
1787+ gc_sweep_wake_all (ptls , new_gc_allocd_scratch );
1788+ gc_sweep_pool_parallel (ptls );
1789+ gc_sweep_wait_for_all ();
1790+
1791+ // reset half-pages pointers
1792+ for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1793+ jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1794+ if (ptls2 != NULL ) {
1795+ ptls2 -> gc_tls .page_metadata_allocd = new_gc_allocd_scratch [t_i ].stack ;
1796+ for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
1797+ jl_gc_pool_t * p = & ptls2 -> gc_tls .heap .norm_pools [i ];
1798+ p -> newpages = NULL ;
1799+ }
17941800 }
17951801 }
1796- }
17971802
1798- // merge free lists
1799- for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1800- jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1801- if (ptls2 == NULL ) {
1802- continue ;
1803- }
1804- jl_gc_pagemeta_t * pg = jl_atomic_load_relaxed (& ptls2 -> gc_tls .page_metadata_allocd .bottom );
1805- while (pg != NULL ) {
1806- jl_gc_pagemeta_t * pg2 = pg -> next ;
1807- if (pg -> fl_begin_offset != UINT16_MAX ) {
1808- char * cur_pg = pg -> data ;
1809- jl_taggedvalue_t * fl_beg = (jl_taggedvalue_t * )(cur_pg + pg -> fl_begin_offset );
1810- jl_taggedvalue_t * fl_end = (jl_taggedvalue_t * )(cur_pg + pg -> fl_end_offset );
1811- * pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = fl_beg ;
1812- pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = & fl_end -> next ;
1803+ // merge free lists
1804+ for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1805+ jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1806+ if (ptls2 == NULL ) {
1807+ continue ;
1808+ }
1809+ jl_gc_pagemeta_t * pg = jl_atomic_load_relaxed (& ptls2 -> gc_tls .page_metadata_allocd .bottom );
1810+ while (pg != NULL ) {
1811+ jl_gc_pagemeta_t * pg2 = pg -> next ;
1812+ if (pg -> fl_begin_offset != UINT16_MAX ) {
1813+ char * cur_pg = pg -> data ;
1814+ jl_taggedvalue_t * fl_beg = (jl_taggedvalue_t * )(cur_pg + pg -> fl_begin_offset );
1815+ jl_taggedvalue_t * fl_end = (jl_taggedvalue_t * )(cur_pg + pg -> fl_end_offset );
1816+ * pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = fl_beg ;
1817+ pfl [t_i * JL_GC_N_POOLS + pg -> pool_n ] = & fl_end -> next ;
1818+ }
1819+ pg = pg2 ;
18131820 }
1814- pg = pg2 ;
18151821 }
1816- }
18171822
1818- // null out terminal pointers of free lists
1819- for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1820- jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1821- if (ptls2 != NULL ) {
1822- for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
1823- * pfl [t_i * JL_GC_N_POOLS + i ] = NULL ;
1823+ // null out terminal pointers of free lists
1824+ for (int t_i = 0 ; t_i < n_threads ; t_i ++ ) {
1825+ jl_ptls_t ptls2 = gc_all_tls_states [t_i ];
1826+ if (ptls2 != NULL ) {
1827+ for (int i = 0 ; i < JL_GC_N_POOLS ; i ++ ) {
1828+ * pfl [t_i * JL_GC_N_POOLS + i ] = NULL ;
1829+ }
18241830 }
18251831 }
1826- }
18271832
1828- // cleanup
1829- free (pfl );
1830- free (new_gc_allocd_scratch );
1833+ // cleanup
1834+ free (pfl );
1835+ free (new_gc_allocd_scratch );
1836+ }
1837+ uint64_t t_page_walk_end = jl_hrtime ();
1838+ gc_num .total_sweep_page_walk_time += t_page_walk_end - t_page_walk_start ;
18311839
18321840#ifdef _P64 // only enable concurrent sweeping on 64bit
18331841 // wake thread up to sweep concurrently
18341842 if (jl_n_sweepthreads > 0 ) {
18351843 uv_sem_post (& gc_sweep_assists_needed );
18361844 }
18371845 else {
1846+ uint64_t t_madvise_start = jl_hrtime ();
18381847 gc_free_pages ();
1848+ uint64_t t_madvise_end = jl_hrtime ();
1849+ gc_num .total_sweep_madvise_time += t_madvise_end - t_madvise_start ;
18391850 }
18401851#else
18411852 gc_free_pages ();
0 commit comments