Skip to content

Commit

Permalink
replace malloc with calloc for array allocation for JuliaLang#9147
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed Nov 28, 2014
1 parent 40fc56b commit 7c9ab1d
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 25 deletions.
4 changes: 1 addition & 3 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
a->type = atype;
a->how = 0;
data = (char*)a + doffs;
if (tot > 0 && !isunboxed) {
if (tot > 0) {
memset(data, 0, tot);
}
}
Expand All @@ -95,8 +95,6 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
a->how = 2;
data = jl_gc_managed_malloc(tot);
jl_gc_track_malloced_array(a);
if (!isunboxed)
memset(data, 0, tot);
JL_GC_POP();
}

Expand Down
39 changes: 17 additions & 22 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,27 +111,27 @@ static size_t total_freed_bytes=0;
// malloc wrappers, aligned allocation

#ifdef _P64
#define malloc_a16(sz) malloc(((sz)+15)&-16)
#define calloc_a16(sz) calloc(1,((sz)+15)&-16)

This comment has been minimized.

Copy link
@chriselrod

chriselrod Oct 15, 2021

64 byte alignment would be important for performance for larger arrays.
Julia allocates arrays with 64 byte alignment by default if they exceed a certain size threshold.

But I don't know all the internal details. IIRC, it was handled elsewhere.

This comment has been minimized.

Copy link
@chriselrod

chriselrod Oct 15, 2021

https://github.com/JuliaLang/julia/blob/b8ed1ae9e759949ceef1cf6974cb2e9c03394d50/src/array.c#L120-L123
Not sure how that code interacts with the gc code here. I'd have to spend more time looking at it.

This comment has been minimized.

Copy link
@mkitti

mkitti Oct 16, 2021

@chriselrod , I think the most was recent attempt at this was JuliaLang#22953

This comment has been minimized.

Copy link
@stevengj

stevengj Oct 16, 2021

Author Owner

Another option would be to implement our own calloc similar to musl: https://github.com/ifduyue/musl/blob/cfdfd5ea3ce14c6abf7fb22a531f3d99518b5a1b/src/malloc/calloc.c

As I understand it, it relies on the fact that the zero pages(used automatically by the OS for newly allocated memory pages) are copy-on-write, so it reads through the allocated memory on 4k boundaries (in-cache for reading the zero page over and over) and only calls memset when it finds non-zero words.

The advantage of this is that we can implement it ourselves using posix_memalign etc. Some investigation would be to required to be sure that the same approach works on Windows and Mac, and to check the performance.

Actually on mac you don't need to so anything because calloc is already 16-byte aligned, I think. And on Windows, they don't have a posix_memalign function — their _aligned_malloc essentially works the same as my calloc_a16 function here, so there is no penalty in just using our own.

This comment has been minimized.

Copy link
@mkitti

This comment has been minimized.

Copy link
@stevengj

stevengj Oct 16, 2021

Author Owner

In other words, if we implement the musl solution on Linux and BSD, then that should be it; Mac and Windows are done.

#define free_a16(p) free(p)

#elif defined(_OS_WINDOWS_) /* 32-bit OS is implicit here. */
#define malloc_a16(sz) _aligned_malloc(sz?((sz)+15)&-16:1, 16)
#define free_a16(p) _aligned_free(p)

#elif defined(__APPLE__)
#define malloc_a16(sz) malloc(((sz)+15)&-16)
#define calloc_a16(sz) calloc(1,((sz)+15)&-16)
#define free_a16(p) free(p)

#else
static inline void *malloc_a16(size_t sz)
static inline void *calloc_a16(size_t n)
{
void *ptr;
if (posix_memalign(&ptr, 16, (sz+15)&-16))
return NULL;
return ptr;
p = (void *) (((uintptr_t) p0 + 16) & (~((uintptr_t) (16 - 1))));
*((void **) p - 1) = p0;
return p;
}
static inline void *realloc_a16(void *p, size_t n)
{
}
static inline void free_a16(void *p)
{
if (p) free(*((void **) p - 1));
}
#define free_a16(p) free(p)

#endif

DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
Expand Down Expand Up @@ -168,7 +168,7 @@ void *jl_gc_managed_malloc(size_t sz)
if (allocd_bytes > collect_interval)
jl_gc_collect();
sz = (sz+15) & -16;
void *b = malloc_a16(sz);
void *b = calloc_a16(sz);
if (b == NULL)
jl_throw(jl_memory_exception);
allocd_bytes += sz;
Expand All @@ -183,16 +183,11 @@ void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz, int isaligned)
void *b;
#ifdef _P64
b = realloc(d, sz);
#elif defined(_OS_WINDOWS_)
if (isaligned)
b = _aligned_realloc(d, sz, 16);
else
b = realloc(d, sz);
#elif defined(__APPLE__)
b = realloc(d, sz);
#else
// TODO better aligned realloc here
b = malloc_a16(sz);
b = calloc_a16(sz);
if (b != NULL) {
memcpy(b, d, oldsz);
if (isaligned) free_a16(d); else free(d);
Expand Down Expand Up @@ -348,7 +343,7 @@ static void *alloc_big(size_t sz)
if (sz+offs+15 < offs+15) // overflow in adding offs, size was "negative"
jl_throw(jl_memory_exception);
size_t allocsz = (sz+offs+15) & -16;
bigval_t *v = (bigval_t*)malloc_a16(allocsz);
bigval_t *v = (bigval_t*)calloc_a16(allocsz);
allocd_bytes += allocsz;
if (v == NULL)
jl_throw(jl_memory_exception);
Expand Down Expand Up @@ -462,7 +457,7 @@ static void add_page(pool_t *p)
gcpage_t *pg = (gcpage_t*)mmap(NULL, sizeof(gcpage_t), PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
#else
gcpage_t *pg = (gcpage_t*)malloc_a16(sizeof(gcpage_t));
gcpage_t *pg = (gcpage_t*)calloc_a16(sizeof(gcpage_t));
#endif
if (pg == NULL)
jl_throw(jl_memory_exception);
Expand Down
5 changes: 5 additions & 0 deletions test/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1962,3 +1962,8 @@ function f9134()
end
end
@test_throws UndefVarError f9134()

# issue #9147
for n = 1:1000
@test all(Array(Int, n) .== 0)
end

0 comments on commit 7c9ab1d

Please sign in to comment.