Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
Fix JuliaLang#42673 by checking for unsigned integer wrapping for jl_*calloc*
Reused former isaligned tagged bit for howtofree tagged bit
Retained all existing functions
jl_gc_*_aligned defaults to jl_* since macOS guarantees page alignment
offset alignment to hold original (void *) p0, and size_t align

commit 842bf06e785d3be97558083abe181505c6840549
Author: Mark Kittisopikul <[email protected]>
Date:   Sun Oct 17 02:20:11 2021 -0400

    Undo renaming of jl_calloc, jl_free, and jl_realloc

commit 9594400d207a9f8baac832af5dd0830165d1192e
Author: Daniel Matz <[email protected]>
Date:   Fri Nov 11 18:31:54 2016 -0600

    Remove the isaligned array flag

commit 4bb083a5f6b44766632dd394397257ebafc91726
Author: Daniel Matz <[email protected]>
Date:   Fri Nov 4 08:59:43 2016 -0500

    Disambiguate jl_malloc from jl_malloc_aligned

    Add gc counted, aligned malloc
  • Loading branch information
mkitti committed Oct 18, 2021
1 parent b8ed1ae commit f36739b
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 19 deletions.
51 changes: 38 additions & 13 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,18 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
assert(((size_t)a & 15) == 0);
// No allocation or safepoint allowed after this
a->flags.how = 0;
a->flags.howtofree = 0;
data = (char*)a + doffs;
}
else {
data = jl_gc_managed_malloc(tot);
tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); // align whole object
data = jl_gc_malloc_aligned(tot, JL_CACHE_BYTE_ALIGNMENT);
// Allocate the Array **after** allocating the data
// to make sure the array is still young
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
// No allocation or safepoint allowed after this
a->flags.how = 2;
a->flags.howtofree = 1; // use jl_gc_free_aligned
jl_gc_track_malloced_array(ct->ptls, a);
}
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
Expand All @@ -154,7 +157,6 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
a->flags.hasptr = hasptr;
a->elsize = elsz;
a->flags.isshared = 0;
a->flags.isaligned = 1;
a->offset = 0;
if (ndims == 1) {
a->nrows = nel;
Expand Down Expand Up @@ -233,7 +235,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
a->flags.ndims = ndims;
a->offset = 0;
a->data = NULL;
a->flags.isaligned = data->flags.isaligned;
a->flags.howtofree = data->flags.howtofree;
jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
jl_value_t *eltype = jl_tparam0(atype);
size_t elsz = 0, align = 0;
Expand Down Expand Up @@ -313,7 +315,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
a->flags.ndims = 1;
a->offset = 0;
a->data = jl_string_data(str);
a->flags.isaligned = 0;
a->flags.howtofree = 0;
a->elsize = 1;
a->flags.ptrarray = 0;
a->flags.hasptr = 0;
Expand Down Expand Up @@ -367,9 +369,10 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
a->flags.ndims = 1;
a->flags.isshared = 1;
a->flags.isaligned = 0; // TODO: allow passing memalign'd buffers
a->flags.howtofree = 0;
if (own_buffer) {
a->flags.how = 2;
a->flags.howtofree = 0;
jl_gc_track_malloced_array(ct->ptls, a);
jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
}
Expand Down Expand Up @@ -435,7 +438,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
a->flags.ndims = ndims;
a->offset = 0;
a->flags.isshared = 1;
a->flags.isaligned = 0;
a->flags.howtofree = 0;
if (own_buffer) {
a->flags.how = 2;
jl_gc_track_malloced_array(ct->ptls, a);
Expand Down Expand Up @@ -708,8 +711,13 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
if (a->flags.how == 2) {
// already malloc'd - use realloc
char *olddata = (char*)a->data - oldoffsnb;
a->data = jl_gc_managed_realloc(olddata, nbytes, oldnbytes,
a->flags.isaligned, (jl_value_t*)a);
assert(a->flags.howtofree == 0 || a->flags.howtofree == 1);
//Compare to jl_array_shrink
if (a->flags.howtofree == 0)
a->data = jl_gc_managed_realloc(olddata, nbytes, oldnbytes,
0, (jl_value_t*)a);
else if (a->flags.howtofree == 1)
a->data = jl_gc_realloc_aligned(olddata, nbytes, oldnbytes, (jl_value_t*)a);
}
else if (a->flags.how == 3 && jl_is_string(jl_array_data_owner(a)) && !isbitsunion) {
// if data is in a String, keep it that way
Expand All @@ -727,11 +735,18 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
}
else {
newbuf = 1;
if (nbytes >= MALLOC_THRESH) {
a->data = jl_gc_managed_malloc(nbytes);
if (
#ifdef _P64
nbytes >= MALLOC_THRESH
#else
elsz > 4
#endif
) {
a->data = jl_gc_malloc_aligned(nbytes, JL_CACHE_BYTE_ALIGNMENT);
//a-> data = jl_gc_managed_malloc(nbytes);
jl_gc_track_malloced_array(ct->ptls, a);
a->flags.how = 2;
a->flags.isaligned = 1;
a->flags.howtofree = 1; // use jl_gc_free_aligned from jl_gc_free_array
}
else {
a->data = jl_gc_alloc_buf(ct->ptls, nbytes);
Expand Down Expand Up @@ -1053,8 +1068,18 @@ STATIC_INLINE void jl_array_shrink(jl_array_t *a, size_t dec)
memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
}
size_t oldoffsnb = a->offset * elsz;
a->data = ((char*)jl_gc_managed_realloc(originalptr, newbytes, oldnbytes,
a->flags.isaligned, (jl_value_t*) a)) + oldoffsnb;
//a->data = ((char*)jl_gc_managed_realloc(originalptr, newbytes, oldnbytes,
// a->flags.howtofree, (jl_value_t*) a)) + oldoffsnb;

//a->flags.isaligned used to be passed to jl_gc_managed_realloc here
//Compare to array_resize_buffer
assert(a->flags.howtofree == 0 || a->flags.howtofree == 1);
if (a->flags.howtofree == 0)
a->data = ((char*)jl_gc_managed_realloc(originalptr, newbytes, oldnbytes,
0, (jl_value_t*) a)) + oldoffsnb;
else if (a->flags.howtofree == 1)
a->data = ((char*)jl_gc_realloc_aligned(originalptr, newbytes, oldnbytes,
(jl_value_t*)a)) + oldoffsnb;
a->maxsize -= dec;
if (isbitsunion) {
newtypetagdata = jl_array_typetagdata(a);
Expand Down
80 changes: 75 additions & 5 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1113,10 +1113,11 @@ static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT
{
if (a->flags.how == 2) {
char *d = (char*)a->data - a->offset*a->elsize;
if (a->flags.isaligned)
jl_free_aligned(d);
else
assert(a->flags.howtofree == 0 || a->flags.howtofree == 1);
if (a->flags.howtofree == 0)
free(d);
else if (a->flags.howtofree == 1)
jl_gc_free_aligned(d);
gc_num.freed += jl_array_nbytes(a);
}
}
Expand Down Expand Up @@ -3461,8 +3462,8 @@ JL_DLLEXPORT void *jl_malloc(size_t sz)
return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
}

JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
{
//_unchecked_calloc does not check for potential overflow of nm*sz
static inline void *_unchecked_calloc(size_t nm, size_t sz) {
size_t nmsz = nm*sz;
int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
if (p == NULL)
Expand All @@ -3471,6 +3472,13 @@ JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
}

JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
{
if (nm > SIZE_MAX/sz)
return NULL;
return _unchecked_calloc(nm, sz);
}

JL_DLLEXPORT void jl_free(void *p)
{
if (p != NULL) {
Expand Down Expand Up @@ -3525,6 +3533,68 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
errno = last_errno;
return b;
}
// TODO add special casing for macOS, where there is guarantee of alignment
// - DONE, just use jl_malloc, jl_calloc, jl_realloc, jl_free since always aligned
// TODO add special casing for 64 bit systems when 16 byte alignment is requested
// TODO add checks on align?
// - Enforce posix_memalign reqs of power of 2 multiple of sizeof(void *)?
void *jl_gc_malloc_aligned(size_t sz, size_t align)
{
#if defined(__APPLE__)
return jl_malloc(sz);
#endif
size_t offset = align - 1 + sizeof(void *) + sizeof(size_t);
void *p0 = jl_malloc(sz + offset);
if (!p0) return NULL;
void *p = (void *) (((uintptr_t) p0 + offset) & (~((uintptr_t) (align - 1))));
*((void **) p - 1) = p0;
*((size_t *) p - 2) = align;
return p;
}

void *jl_gc_calloc_aligned(size_t nm, size_t sz, size_t align)
{
#if defined(__APPLE__)
return jl_calloc(sz);
#endif
size_t offset = align - 1 + sizeof(void *) + sizeof(size_t);
if (nm > (SIZE_MAX-offset)/sz)
return NULL;
void *p0 = _unchecked_calloc(1, nm * sz + offset);
if (!p0) return NULL;
void *p = (void *) (((uintptr_t) p0 + offset) & (~((uintptr_t) (align - 1))));
*((void **) p - 1) = p0;
*((size_t *) p - 2) = align;
return p;
}

// TODO when resizing an array, you actually only need to memcpy the portion
// that is being used, which can have some savings
// How do we know what align is 16 bits on P64?
// See older jl_realloc_aligned, perhaps?
void *jl_gc_realloc_aligned(void *p, size_t sz, size_t oldsz, jl_value_t *owner)
{
#if defined(__APPLE__)
return jl_realloc(p, sz);
#endif
void *p0 = *((void **) p - 1);
size_t align = *((size_t *) p - 2);
size_t offset = align - 1 + sizeof(void *) + sizeof(size_t);
void *p0new = jl_realloc(p0, sz + offset);
if (!p0new) return NULL;
void *pnew = (void *) (((uintptr_t) p0new + offset) & (~((uintptr_t) (align - 1))));
*((void **) pnew - 1) = p0new;
*((size_t *) pnew - 2) = align;
return pnew;
}

void jl_gc_free_aligned(void *p)
{
#if defined(__APPLE__)
return jl_free(p);
#endif
if (p) jl_free(*((void **) p - 1));
}

static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
int isaligned, jl_value_t *owner, int8_t can_collect)
Expand Down
6 changes: 5 additions & 1 deletion src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ typedef struct {
uint16_t ptrarray:1; // representation is pointer array
uint16_t hasptr:1; // representation has embedded pointers
uint16_t isshared:1; // data is shared by multiple Arrays
uint16_t isaligned:1; // data allocated with memalign
uint16_t howtofree:1; // 0 = free, 1 = jl_gc_free_aligned; previously, this bit was called aligned
} jl_array_flags_t;

JL_EXTENSION typedef struct {
Expand Down Expand Up @@ -893,6 +893,10 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
int isaligned, jl_value_t *owner);
void *jl_gc_malloc_aligned(size_t sz, size_t align);
void *jl_gc_calloc_aligned(size_t nm, size_t sz, size_t align);
void *jl_gc_realloc_aligned(void *p, size_t sz, size_t oldsz, jl_value_t *owner);
void jl_gc_free_aligned(void *p);

// object accessors -----------------------------------------------------------

Expand Down

0 comments on commit f36739b

Please sign in to comment.