Skip to content

Commit

Permalink
WIP: add experimental implemenatation of atomic_thread_fence
Browse files Browse the repository at this point in the history
Signed-off-by: akatrano <[email protected]>
  • Loading branch information
alexey-katranov committed Jun 25, 2021
1 parent 5ccb742 commit bc23916
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 0 deletions.
9 changes: 9 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_clock.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ class SyncClock {
Iter begin();
Iter end();

#if __TSAN_EXPERIMENTAL_FENCES
// TODO: rework the implementation via ThreadClock
u16 size() { return size_; }
#endif

private:
friend class ThreadClock;
friend class Iter;
Expand Down Expand Up @@ -220,6 +225,10 @@ class ThreadClock {

// Number of active elements in the clk_ table (the rest is zeros).
uptr nclk_;
#if __TSAN_EXPERIMENTAL_FENCES
// TODO: rework
public:
#endif
u64 clk_[kMaxTidInClock]; // Fixed size vector clock.

bool IsAlreadyAcquired(const SyncClock *src) const;
Expand Down
5 changes: 5 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@
# endif
#endif

#ifndef __TSAN_EXPERIMENTAL_FENCES
#define __TSAN_EXPERIMENTAL_FENCES 1
#endif


namespace __tsan {

const int kClkBits = 42;
Expand Down
60 changes: 60 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,27 @@ static a128 NoTsanAtomicLoad(const volatile a128 *a, morder mo) {
}
#endif

#if __TSAN_EXPERIMENTAL_FENCES
namespace __tsan {
void AtomicFenceImplLoad(ThreadState *thr, uptr pc, SyncClock* c);
void AtomicFenceImplRMW(ThreadState *thr, uptr pc, SyncClock* c);
}
#endif

template<typename T>
static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) {
CHECK(IsLoadOrder(mo));
// This fast-path is critical for performance.
// Assume the access is atomic.
if (!IsAcquireOrder(mo)) {
#if __TSAN_EXPERIMENTAL_FENCES
// TODO: avoid fence logic if no fences
SyncVar *s = ctx->metamap.GetIfExistsAndLock((uptr)a, false);
if (s) {
AtomicFenceImplLoad(thr, pc, &s->fence_clock);
s->mtx.ReadUnlock();
}
#endif
MemoryReadAtomic(thr, pc, (uptr)a, SizeLog<T>());
return NoTsanAtomicLoad(a, mo);
}
Expand Down Expand Up @@ -281,6 +296,7 @@ template<typename T, T (*F)(volatile T *v, T op)>
static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog<T>());
SyncVar *s = 0;

if (mo != mo_relaxed) {
s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true);
thr->fast_state.IncrementEpoch();
Expand All @@ -292,7 +308,17 @@ static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
ReleaseImpl(thr, pc, &s->clock);
else if (IsAcquireOrder(mo))
AcquireImpl(thr, pc, &s->clock);
#if __TSAN_EXPERIMENTAL_FENCES
AtomicFenceImplRMW(thr, pc, &s->fence_clock);
#endif
}
#if __TSAN_EXPERIMENTAL_FENCES
else {
s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true);
AtomicFenceImplRMW(thr, pc, &s->fence_clock);
}
#endif

v = F(a, v);
if (s)
s->mtx.Unlock();
Expand Down Expand Up @@ -451,12 +477,46 @@ static T AtomicCAS(ThreadState *thr, uptr pc,
}

#if !SANITIZER_GO

#if __TSAN_EXPERIMENTAL_FENCES

namespace __tsan {
void AtomicFenceAcquireImpl(ThreadState *thr, uptr pc);
void AtomicFenceReleaseImpl(ThreadState *thr, uptr pc);
}

static void AtomicFenceAcquire(ThreadState *thr, uptr pc) {
AtomicFenceAcquireImpl(thr, pc);
}

static void AtomicFenceRelease(ThreadState *thr, uptr pc) {
thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
// TODO: What we are tracing?
TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
AtomicFenceReleaseImpl(thr, pc);
}

static void AtomicFenceImpl(ThreadState *thr, uptr pc, morder mo) {
if (IsAcquireOrder(mo)) {
AtomicFenceAcquire(thr, pc);
}
if (IsReleaseOrder(mo)) {
AtomicFenceRelease(thr, pc);
}
}

#endif

static void NoTsanAtomicFence(morder mo) {
__sync_synchronize();
}

static void AtomicFence(ThreadState *thr, uptr pc, morder mo) {
// FIXME(dvyukov): not implemented.
#if __TSAN_EXPERIMENTAL_FENCES
AtomicFenceImpl(thr, pc, mo);
#endif
__sync_synchronize();
}
#endif
Expand Down
6 changes: 6 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_rtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,12 @@ struct ThreadState {
ThreadClock last_sleep_clock;
#endif

#if __TSAN_EXPERIMENTAL_FENCES
// TODO: consider using ThreadClock
SyncClock fence_clock_acquire;
SyncClock fence_clock_release;
#endif

// Set in regions of runtime that must be signal-safe and fork-safe.
// If set, malloc must not be called.
int nomalloc;
Expand Down
67 changes: 67 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -527,13 +527,80 @@ void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) {
void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
if (thr->ignore_sync)
return;

thr->clock.set(thr->fast_state.epoch());
thr->fast_synch_epoch = thr->fast_state.epoch();
thr->clock.acq_rel(&thr->proc()->clock_cache, c);

#if __TSAN_EXPERIMENTAL_FENCES
// TODO: add acquire fence clock update

// Update the fence clock to be available for relaxed loads
// TODO: avoid when no explicit fences
thr->fence_clock_release.Resize(&thr->proc()->clock_cache, thr->clock.size());
u64 *src_pos = &thr->clock.clk_[0];
for (ClockElem &dst_elem : thr->fence_clock_release) {
u64 epoch = *src_pos;
if (dst_elem.epoch < epoch) {
dst_elem.epoch = epoch;
}
src_pos++;
}
#endif

StatInc(thr, StatSyncAcquire);
StatInc(thr, StatSyncRelease);
}

#if __TSAN_EXPERIMENTAL_FENCES
void AtomicFenceImplLoad(ThreadState *thr, uptr pc, SyncClock* src) {
// The relaxed load brought the implicit clock. So, update the acquire clock.
thr->fence_clock_acquire.Resize(&thr->proc()->clock_cache, src->size());

auto dst = thr->fence_clock_acquire.begin();
for (ClockElem &src_elem : *src) {
u64 epoch = src_elem.epoch;
if ((*dst).epoch < epoch) {
(*dst).epoch = epoch;
}
++dst;
}
}

void AtomicFenceImplStore(ThreadState *thr, uptr pc, SyncClock* dst) {
// The release fence clock is propagated into sync clock
dst->Resize(&thr->proc()->clock_cache, thr->fence_clock_release.size());

auto src = thr->fence_clock_release.begin();
for (ClockElem &dst_elem : *dst) {
u64 epoch = (*src).epoch;
if (dst_elem.epoch < epoch) {
dst_elem.epoch = epoch;
}
++src;
}
}

void AtomicFenceImplRMW(ThreadState *thr, uptr pc, SyncClock* src_dst) {
AtomicFenceImplLoad(thr, pc, src_dst);
AtomicFenceImplStore(thr, pc, src_dst);
}

void AtomicFenceAcquireImpl(ThreadState *thr, uptr pc) {
if (thr->ignore_sync)
return;
thr->clock.set(thr->fast_state.epoch());
thr->clock.acquire(&thr->proc()->clock_cache, &thr->fence_clock_acquire);
}
void AtomicFenceReleaseImpl(ThreadState *thr, uptr pc) {
if (thr->ignore_sync)
return;
thr->clock.set(thr->fast_state.epoch());
thr->fast_synch_epoch = thr->fast_state.epoch();
thr->clock.release(&thr->proc()->clock_cache, &thr->fence_clock_release);
}
#endif

void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock))
return;
Expand Down
5 changes: 5 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ void ThreadContext::OnFinished() {
#if !SANITIZER_GO
PlatformCleanUpThreadState(thr);
#endif

#if __TSAN_EXPERIMENTAL_FENCES
thr->fence_clock_acquire.Reset(&thr->proc()->clock_cache);
thr->fence_clock_release.Reset(&thr->proc()->clock_cache);
#endif
thr->~ThreadState();
#if TSAN_COLLECT_STATS
StatAggregate(ctx->stat, thr->stat);
Expand Down
5 changes: 5 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ struct SyncVar {
// with the mtx. This reduces contention for hot sync objects.
SyncClock clock;

#if __TSAN_EXPERIMENTAL_FENCES
// Carriers implicit clock of fences
SyncClock fence_clock;
#endif

void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid);
void Reset(Processor *proc);

Expand Down

0 comments on commit bc23916

Please sign in to comment.