Skip to content

Commit c8f56c6

Browse files
committed
cleanup remset logic a bit (JuliaLang#55021)
I think that keeping a single `remset` (instead of two and keep alternating between them) should be a bit easier to understand and possibly even a bit faster (since we will be accessing the `remset` only once), though that should be a very small difference.
1 parent a19c7b1 commit c8f56c6

File tree

2 files changed

+39
-54
lines changed

2 files changed

+39
-54
lines changed

src/gc.c

Lines changed: 37 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,7 +1895,7 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
18951895
// which is not idempotent. See comments in https://github.com/JuliaLang/julia/issues/50419
18961896
uintptr_t header = jl_atomic_fetch_and_relaxed((_Atomic(uintptr_t) *)&o->header, ~GC_OLD);
18971897
if (header & GC_OLD) { // write barrier has not been triggered in this object yet
1898-
arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
1898+
arraylist_push(&ptls->heap.remset, (jl_value_t*)ptr);
18991899
ptls->heap.remset_nptr++; // conservative
19001900
}
19011901
}
@@ -2002,7 +2002,7 @@ STATIC_INLINE void gc_mark_push_remset(jl_ptls_t ptls, jl_value_t *obj,
20022002
{
20032003
if (__unlikely((nptr & 0x3) == 0x3)) {
20042004
ptls->heap.remset_nptr += nptr >> 2;
2005-
arraylist_t *remset = ptls->heap.remset;
2005+
arraylist_t *remset = &ptls->heap.remset;
20062006
size_t len = remset->len;
20072007
if (__unlikely(len >= remset->max)) {
20082008
arraylist_push(remset, obj);
@@ -3187,23 +3187,6 @@ void gc_mark_clean_reclaim_sets(void)
31873187
}
31883188
}
31893189

3190-
static void gc_premark(jl_ptls_t ptls2)
3191-
{
3192-
arraylist_t *remset = ptls2->heap.remset;
3193-
ptls2->heap.remset = ptls2->heap.last_remset;
3194-
ptls2->heap.last_remset = remset;
3195-
ptls2->heap.remset->len = 0;
3196-
ptls2->heap.remset_nptr = 0;
3197-
// avoid counting remembered objects
3198-
// in `perm_scanned_bytes`
3199-
size_t len = remset->len;
3200-
void **items = remset->items;
3201-
for (size_t i = 0; i < len; i++) {
3202-
jl_value_t *item = (jl_value_t *)items[i];
3203-
jl_astaggedvalue(item)->bits.gc = GC_OLD_MARKED;
3204-
}
3205-
}
3206-
32073190
static void gc_queue_thread_local(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
32083191
{
32093192
jl_task_t *task;
@@ -3247,14 +3230,29 @@ static void gc_queue_bt_buf(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
32473230
}
32483231
}
32493232

3250-
static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
3233+
static void gc_queue_remset(jl_gc_markqueue_t *mq, jl_ptls_t ptls2)
32513234
{
3252-
size_t len = ptls2->heap.last_remset->len;
3253-
void **items = ptls2->heap.last_remset->items;
3235+
void **items = ptls2->heap.remset.items;
3236+
size_t len = ptls2->heap.remset.len;
32543237
for (size_t i = 0; i < len; i++) {
3255-
// Tag the pointer to indicate it's in the remset
3256-
jl_value_t *v = (jl_value_t *)((uintptr_t)items[i] | GC_REMSET_PTR_TAG);
3257-
gc_ptr_queue_push(&ptls->mark_queue, v);
3238+
void *_v = items[i];
3239+
jl_astaggedvalue(_v)->bits.gc = GC_OLD_MARKED;
3240+
jl_value_t *v = (jl_value_t *)((uintptr_t)_v | GC_REMSET_PTR_TAG);
3241+
gc_ptr_queue_push(mq, v);
3242+
}
3243+
// Don't forget to clear the remset
3244+
ptls2->heap.remset.len = 0;
3245+
ptls2->heap.remset_nptr = 0;
3246+
}
3247+
3248+
static void gc_check_all_remsets_are_empty(void)
3249+
{
3250+
for (int i = 0; i < gc_n_threads; i++) {
3251+
jl_ptls_t ptls2 = gc_all_tls_states[i];
3252+
if (ptls2 != NULL) {
3253+
assert(ptls2->heap.remset.len == 0);
3254+
assert(ptls2->heap.remset_nptr == 0);
3255+
}
32583256
}
32593257
}
32603258

@@ -3456,15 +3454,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
34563454
JL_PROBE_GC_MARK_BEGIN();
34573455
{
34583456
JL_TIMING(GC, GC_Mark);
3459-
3460-
// 1. fix GC bits of objects in the remset.
3461-
assert(gc_n_threads);
3462-
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
3463-
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
3464-
if (ptls2 != NULL)
3465-
gc_premark(ptls2);
3466-
}
3467-
34683457
assert(gc_n_threads);
34693458
int single_threaded_mark = (jl_n_markthreads == 0 || gc_heap_snapshot_enabled);
34703459
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
@@ -3477,17 +3466,18 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
34773466
mq_dest = &ptls_dest->mark_queue;
34783467
}
34793468
if (ptls2 != NULL) {
3480-
// 2.1. mark every thread local root
3469+
// 1.1. mark every thread local root
34813470
gc_queue_thread_local(mq_dest, ptls2);
3482-
// 2.2. mark any managed objects in the backtrace buffer
3471+
// 1.2. mark any managed objects in the backtrace buffer
34833472
// TODO: treat these as roots for gc_heap_snapshot_record
34843473
gc_queue_bt_buf(mq_dest, ptls2);
3485-
// 2.3. mark every object in the `last_remsets` and `rem_binding`
3486-
gc_queue_remset(ptls_dest, ptls2);
3474+
// 1.3. mark every object in the remset
3475+
gc_queue_remset(mq_dest, ptls2);
34873476
}
34883477
}
3478+
gc_check_all_remsets_are_empty();
34893479

3490-
// 3. walk roots
3480+
// 2. walk roots
34913481
gc_mark_roots(mq);
34923482
if (gc_cblist_root_scanner) {
34933483
gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
@@ -3497,7 +3487,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
34973487
gc_mark_loop_barrier();
34983488
gc_mark_clean_reclaim_sets();
34993489

3500-
// 4. check for objects to finalize
3490+
// 3. check for objects to finalize
35013491
clear_weak_refs();
35023492
// Record the length of the marked list since we need to
35033493
// mark the object moved to the marked list from the
@@ -3560,7 +3550,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
35603550
gc_num.total_allocd += gc_num.allocd;
35613551
if (!prev_sweep_full)
35623552
promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
3563-
// 5. next collection decision
3553+
// 4. next collection decision
35643554
int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10));
35653555
int nptr = 0;
35663556
assert(gc_n_threads);
@@ -3619,7 +3609,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
36193609
last_long_collect_interval = gc_num.interval;
36203610
}
36213611
scanned_bytes = 0;
3622-
// 6. start sweeping
3612+
// 5. start sweeping
36233613
uint64_t start_sweep_time = jl_hrtime();
36243614
JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
36253615
{
@@ -3658,21 +3648,21 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
36583648
}
36593649

36603650
// sweeping is over
3661-
// 7. if it is a quick sweep, put back the remembered objects in queued state
3651+
// 6. if it is a quick sweep, put back the remembered objects in queued state
36623652
// so that we don't trigger the barrier again on them.
36633653
assert(gc_n_threads);
36643654
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
36653655
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
36663656
if (ptls2 == NULL)
36673657
continue;
36683658
if (!sweep_full) {
3669-
for (int i = 0; i < ptls2->heap.remset->len; i++) {
3670-
void *ptr = ptls2->heap.remset->items[i];
3659+
for (int i = 0; i < ptls2->heap.remset.len; i++) {
3660+
void *ptr = ptls2->heap.remset.items[i];
36713661
jl_astaggedvalue(ptr)->bits.gc = GC_MARKED;
36723662
}
36733663
}
36743664
else {
3675-
ptls2->heap.remset->len = 0;
3665+
ptls2->heap.remset.len = 0;
36763666
}
36773667
}
36783668

@@ -3887,10 +3877,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
38873877
heap->mallocarrays = NULL;
38883878
heap->mafreelist = NULL;
38893879
heap->big_objects = NULL;
3890-
heap->remset = &heap->_remset[0];
3891-
heap->last_remset = &heap->_remset[1];
3892-
arraylist_new(heap->remset, 0);
3893-
arraylist_new(heap->last_remset, 0);
3880+
arraylist_new(&heap->remset, 0);
38943881
arraylist_new(&ptls->finalizers, 0);
38953882
arraylist_new(&ptls->sweep_objs, 0);
38963883

src/julia_threads.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,10 @@ typedef struct {
153153
// variables for tracking big objects
154154
struct _bigval_t *big_objects;
155155

156-
// variables for tracking "remembered set"
157-
arraylist_t _remset[2]; // contains jl_value_t*
158156
// lower bound of the number of pointers inside remembered values
159157
int remset_nptr;
160-
arraylist_t *remset;
161-
arraylist_t *last_remset;
158+
// remembered set
159+
arraylist_t remset;
162160

163161
// variables for allocating objects from pools
164162
#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`

0 commit comments

Comments
 (0)