Skip to content

Commit

Permalink
Save a couple loads/stores in sweep pages (#49263)
Browse files Browse the repository at this point in the history
We did a load/store on every iteration. Keep a temporary in a register
instead. It's a very small difference but it's visible in vtune.
  • Loading branch information
gbaraldi authored Apr 20, 2023
1 parent 1f94d2e commit f84fb5b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 17 deletions.
4 changes: 2 additions & 2 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -563,11 +563,11 @@ JL_NO_ASAN static void gc_scrub_range(char *low, char *high)
// Find the age bit
char *page_begin = gc_page_data(tag) + GC_PAGE_OFFSET;
int obj_id = (((char*)tag) - page_begin) / osize;
uint8_t *ages = pg->ages + obj_id / 8;
uint32_t *ages = pg->ages + obj_id / 32;
// Force this to be a young object to save some memory
// (especially on 32bit where it's more likely to have pointer-like
// bit patterns)
*ages &= ~(1 << (obj_id % 8));
*ages &= ~(1 << (obj_id % 32));
memset(tag, 0xff, osize);
// set mark to GC_MARKED (young and marked)
tag->bits.gc = GC_MARKED;
Expand Down
31 changes: 17 additions & 14 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -976,8 +976,8 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
page->has_young = 1;
char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET;
int obj_id = (((char*)o) - page_begin) / page->osize;
uint8_t *ages = page->ages + obj_id / 8;
jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8)));
uint32_t *ages = page->ages + obj_id / 32;
jl_atomic_fetch_and_relaxed((_Atomic(uint32_t)*)ages, ~(1 << (obj_id % 32)));
}
}
objprofile_count(jl_typeof(jl_valueof(o)),
Expand Down Expand Up @@ -1406,7 +1406,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
jl_ptls_t ptls = jl_current_task->ptls;
jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
pg->osize = p->osize;
pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1);
pg->ages = (uint32_t*)malloc_s(LLT_ALIGN(GC_PAGE_SZ / 8 / p->osize + 1, sizeof(uint32_t)));
pg->thread_n = ptls->tid;
jl_taggedvalue_t *fl = reset_page(ptls, p, pg, NULL);
p->newpages = fl;
Expand Down Expand Up @@ -1506,7 +1506,7 @@ int64_t lazy_freed_pages = 0;
static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_taggedvalue_t **pfl, int sweep_full, int osize) JL_NOTSAFEPOINT
{
char *data = pg->data;
uint8_t *ages = pg->ages;
uint32_t *ages = pg->ages;
jl_taggedvalue_t *v = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
char *lim = (char*)v + GC_PAGE_SZ - GC_PAGE_OFFSET - osize;
size_t old_nfree = pg->nfree;
Expand Down Expand Up @@ -1557,18 +1557,25 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
int16_t prev_nold = 0;
int pg_nfree = 0;
jl_taggedvalue_t **pfl_begin = NULL;
uint8_t msk = 1; // mask for the age bit in the current age byte
uint32_t msk = 1; // mask for the age bit in the current age byte
uint32_t age = *ages;
while ((char*)v <= lim) {
if (!msk) {
msk = 1;
*ages = age;
ages++;
age = *ages;
}
int bits = v->bits.gc;
if (!gc_marked(bits)) {
*pfl = v;
pfl = &v->next;
pfl_begin = pfl_begin ? pfl_begin : pfl;
pg_nfree++;
*ages &= ~msk;
age &= ~msk;
}
else { // marked young or old
if (*ages & msk || bits == GC_OLD_MARKED) { // old enough
if (age & msk || bits == GC_OLD_MARKED) { // old enough
// `!age && bits == GC_OLD_MARKED` is possible for
// non-first-class objects like array buffers
// (they may get promoted by jl_gc_wb_buf for example,
Expand All @@ -1584,17 +1591,13 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
has_young = 1;
}
has_marked |= gc_marked(bits);
*ages |= msk;
age |= msk;
freedall = 0;
}
v = (jl_taggedvalue_t*)((char*)v + osize);
msk <<= 1;
if (!msk) {
msk = 1;
ages++;
}
}

*ages = age;
assert(!freedall);
pg->has_marked = has_marked;
pg->has_young = has_young;
Expand Down Expand Up @@ -4017,7 +4020,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
goto valid_object;
// We know now that the age bit reflects liveness status during
// the last sweep and that the cell has not been reused since.
if (!(meta->ages[obj_id / 8] & (1 << (obj_id % 8)))) {
if (!(meta->ages[obj_id / 32] & (1 << (obj_id % 32)))) {
return NULL;
}
// Not a freelist entry, therefore a valid object.
Expand Down
3 changes: 2 additions & 1 deletion src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef JL_GC_H
#define JL_GC_H

#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
Expand Down Expand Up @@ -170,7 +171,7 @@ typedef struct {
uint16_t fl_end_offset; // offset of last free object in this page
uint16_t thread_n; // thread id of the heap that owns this page
char *data;
uint8_t *ages;
uint32_t *ages;
} jl_gc_pagemeta_t;

// Page layout:
Expand Down

0 comments on commit f84fb5b

Please sign in to comment.