Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Don't do 64-bit atomics in 32-bit runtimes.
Browse files Browse the repository at this point in the history
The 64-bit atomics were all on amounts of memory, so a uintptr_t should
be safe. One bit of dodginess is that memory_total could possibly exceed
32-bits while there never being more than 32-bits of memory allocated at
any one time.
abadams committed Jul 16, 2024

Verified

This commit was signed with the committer’s verified signature.
jalaziz Jameel Al-Aziz
1 parent af4af91 commit 8002729
Showing 2 changed files with 28 additions and 12 deletions.
27 changes: 15 additions & 12 deletions src/runtime/profiler_common.cpp
Original file line number Diff line number Diff line change
@@ -180,11 +180,10 @@ WEAK void sampling_profiler_thread(void *) {

namespace {

template<typename T>
void sync_compare_max_and_swap(T *ptr, T val) {
void sync_compare_max_and_swap(uintptr_t *ptr, uintptr_t val) {
using namespace Halide::Runtime::Internal::Synchronization;

T old_val = *ptr;
uintptr_t old_val = *ptr;
while (val > old_val) {
if (atomic_cas_strong_sequentially_consistent(ptr, &old_val, &val)) {
return;
@@ -350,7 +349,11 @@ WEAK void halide_profiler_stack_peak_update(void *user_context,
// Update per-func memory stats
for (int i = 0; i < instance->pipeline_stats->num_funcs; ++i) {
if (f_values[i] != 0) {
sync_compare_max_and_swap(&(instance->funcs[i]).stack_peak, f_values[i]);
// On 32-bit platforms we don't want to use 64-bit
// atomics. Fortunately on these platforms memory usage fits into
// 32-bit integers.
sync_compare_max_and_swap((uintptr_t *)(&(instance->funcs[i]).stack_peak),
(uintptr_t)(f_values[i]));
}
}
}
@@ -382,15 +385,15 @@ WEAK void halide_profiler_memory_allocate(void *user_context,

// Update per-instance memory stats
atomic_add_fetch_sequentially_consistent(&instance->num_allocs, 1);
atomic_add_fetch_sequentially_consistent(&instance->memory_total, incr);
uint64_t p_mem_current = atomic_add_fetch_sequentially_consistent(&instance->memory_current, incr);
sync_compare_max_and_swap(&instance->memory_peak, p_mem_current);
atomic_add_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_total), (uintptr_t)incr);
uint64_t p_mem_current = atomic_add_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_current), (uintptr_t)incr);
sync_compare_max_and_swap((uintptr_t *)(&instance->memory_peak), (uintptr_t)p_mem_current);

// Update per-func memory stats
atomic_add_fetch_sequentially_consistent(&func->num_allocs, 1);
atomic_add_fetch_sequentially_consistent(&func->memory_total, incr);
uint64_t f_mem_current = atomic_add_fetch_sequentially_consistent(&func->memory_current, incr);
sync_compare_max_and_swap(&func->memory_peak, f_mem_current);
atomic_add_fetch_sequentially_consistent((uintptr_t *)(&func->memory_total), (uintptr_t)incr);
uint64_t f_mem_current = atomic_add_fetch_sequentially_consistent((uintptr_t *)(&func->memory_current), (uintptr_t)incr);
sync_compare_max_and_swap((uintptr_t *)(&func->memory_peak), (uintptr_t)f_mem_current);
}

WEAK void halide_profiler_memory_free(void *user_context,
@@ -418,10 +421,10 @@ WEAK void halide_profiler_memory_free(void *user_context,
// unless user specifically calls halide_profiler_reset().

// Update per-pipeline memory stats
atomic_sub_fetch_sequentially_consistent(&instance->memory_current, decr);
atomic_sub_fetch_sequentially_consistent((uintptr_t *)(&instance->memory_current), (uintptr_t)decr);

// Update per-func memory stats
atomic_sub_fetch_sequentially_consistent(&func->memory_current, decr);
atomic_sub_fetch_sequentially_consistent((uintptr_t *)(&func->memory_current), (uintptr_t)decr);
}

WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_state *s) {
13 changes: 13 additions & 0 deletions src/runtime/runtime_atomics.h
Original file line number Diff line number Diff line change
@@ -35,36 +35,43 @@ ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val)

template<typename T>
ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
static_assert(sizeof(T) == 4);
return __sync_fetch_and_add(addr, val);
}

template<typename T, typename TV = typename remove_volatile<T>::type>
ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) {
static_assert(sizeof(T) == 4);
return __sync_fetch_and_add(addr, val);
}

template<typename T, typename TV = typename remove_volatile<T>::type>
ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) {
static_assert(sizeof(T) == 4);
return __sync_fetch_and_sub(addr, val);
}

template<typename T, typename TV = typename remove_volatile<T>::type>
ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) {
static_assert(sizeof(T) == 4);
return __sync_fetch_and_or(addr, val);
}

template<typename T>
ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) {
static_assert(sizeof(T) == 4);
return __sync_add_and_fetch(addr, val);
}

template<typename T>
ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) {
static_assert(sizeof(T) == 4);
return __sync_sub_and_fetch(addr, val);
}

template<typename T, typename TV = typename remove_volatile<T>::type>
ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, TV *expected, TV *desired) {
static_assert(sizeof(T) == 4);
TV oldval = *expected;
TV gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
*expected = gotval;
@@ -99,11 +106,13 @@ ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *e

template<typename T>
ALWAYS_INLINE T atomic_fetch_and_release(T *addr, T val) {
static_assert(sizeof(T) == 4);
return __sync_fetch_and_and(addr, val);
}

template<typename T, typename TV = typename remove_volatile<T>::type>
ALWAYS_INLINE T atomic_fetch_and_sequentially_consistent(T *addr, TV val) {
static_assert(sizeof(T) == 4);
return __sync_fetch_and_and(addr, val);
}

@@ -121,6 +130,7 @@ ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
template<typename T>
ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) {
// Despite the name, this is really just an exchange operation with acquire ordering.
static_assert(sizeof(T) == 4);
return __sync_lock_test_and_set(addr, val);
}

@@ -130,17 +140,20 @@ ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val)

template<typename T>
ALWAYS_INLINE void atomic_store_relaxed(T *addr, T *val) {
static_assert(sizeof(T) == 4);
*addr = *val;
}

template<typename T>
ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
static_assert(sizeof(T) == 4);
*addr = *val;
__sync_synchronize();
}

template<typename T, typename TV = typename remove_volatile<T>::type>
ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) {
static_assert(sizeof(T) == 4);
*addr = *val;
__sync_synchronize();
}

0 comments on commit 8002729

Please sign in to comment.