diff --git a/compiler-rt/lib/tsan/rtl/tsan_clock.h b/compiler-rt/lib/tsan/rtl/tsan_clock.h index 31376a1bc9e2f4..a95da1bcacb2d8 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_clock.h +++ b/compiler-rt/lib/tsan/rtl/tsan_clock.h @@ -59,6 +59,11 @@ class SyncClock { Iter begin(); Iter end(); +#if __TSAN_EXPERIMENTAL_FENCES + // TODO: rework the implementation via ThreadClock + u16 size() { return size_; } +#endif + private: friend class ThreadClock; friend class Iter; @@ -220,6 +225,10 @@ class ThreadClock { // Number of active elements in the clk_ table (the rest is zeros). uptr nclk_; +#if __TSAN_EXPERIMENTAL_FENCES + // TODO: rework + public: +#endif u64 clk_[kMaxTidInClock]; // Fixed size vector clock. bool IsAlreadyAcquired(const SyncClock *src) const; diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h index f53787aeba970f..edaba93b86d777 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_defs.h +++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h @@ -35,6 +35,11 @@ # endif #endif +#ifndef __TSAN_EXPERIMENTAL_FENCES +#define __TSAN_EXPERIMENTAL_FENCES 1 +#endif + + namespace __tsan { const int kClkBits = 42; diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp index 16b4e2218cf1aa..059181487ac245 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp @@ -218,12 +218,27 @@ static a128 NoTsanAtomicLoad(const volatile a128 *a, morder mo) { } #endif +#if __TSAN_EXPERIMENTAL_FENCES +namespace __tsan { + void AtomicFenceImplLoad(ThreadState *thr, uptr pc, SyncClock* c); + void AtomicFenceImplRMW(ThreadState *thr, uptr pc, SyncClock* c); +} +#endif + template static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) { CHECK(IsLoadOrder(mo)); // This fast-path is critical for performance. // Assume the access is atomic. if (!IsAcquireOrder(mo)) { +#if __TSAN_EXPERIMENTAL_FENCES + // TODO: avoid fence logic if no fences + SyncVar *s = ctx->metamap.GetIfExistsAndLock((uptr)a, false); + if (s) { + AtomicFenceImplLoad(thr, pc, &s->fence_clock); + s->mtx.ReadUnlock(); + } +#endif MemoryReadAtomic(thr, pc, (uptr)a, SizeLog()); return NoTsanAtomicLoad(a, mo); } @@ -281,6 +296,7 @@ template static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) { MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog()); SyncVar *s = 0; + if (mo != mo_relaxed) { s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true); thr->fast_state.IncrementEpoch(); @@ -292,7 +308,17 @@ static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) { ReleaseImpl(thr, pc, &s->clock); else if (IsAcquireOrder(mo)) AcquireImpl(thr, pc, &s->clock); +#if __TSAN_EXPERIMENTAL_FENCES + AtomicFenceImplRMW(thr, pc, &s->fence_clock); +#endif } +#if __TSAN_EXPERIMENTAL_FENCES + else { + s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true); + AtomicFenceImplRMW(thr, pc, &s->fence_clock); + } +#endif + v = F(a, v); if (s) s->mtx.Unlock(); @@ -451,12 +477,46 @@ static T AtomicCAS(ThreadState *thr, uptr pc, } #if !SANITIZER_GO + +#if __TSAN_EXPERIMENTAL_FENCES + +namespace __tsan { + void AtomicFenceAcquireImpl(ThreadState *thr, uptr pc); + void AtomicFenceReleaseImpl(ThreadState *thr, uptr pc); +} + +static void AtomicFenceAcquire(ThreadState *thr, uptr pc) { + AtomicFenceAcquireImpl(thr, pc); +} + +static void AtomicFenceRelease(ThreadState *thr, uptr pc) { + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + // TODO: What we are tracing? + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + AtomicFenceReleaseImpl(thr, pc); +} + +static void AtomicFenceImpl(ThreadState *thr, uptr pc, morder mo) { + if (IsAcquireOrder(mo)) { + AtomicFenceAcquire(thr, pc); + } + if (IsReleaseOrder(mo)) { + AtomicFenceRelease(thr, pc); + } +} + +#endif + static void NoTsanAtomicFence(morder mo) { __sync_synchronize(); } static void AtomicFence(ThreadState *thr, uptr pc, morder mo) { // FIXME(dvyukov): not implemented. +#if __TSAN_EXPERIMENTAL_FENCES + AtomicFenceImpl(thr, pc, mo); +#endif __sync_synchronize(); } #endif diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index 3ae519d34da4f7..ca8474c6158fea 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -438,6 +438,12 @@ struct ThreadState { ThreadClock last_sleep_clock; #endif +#if __TSAN_EXPERIMENTAL_FENCES + // TODO: consider using ThreadClock + SyncClock fence_clock_acquire; + SyncClock fence_clock_release; +#endif + // Set in regions of runtime that must be signal-safe and fork-safe. // If set, malloc must not be called. int nomalloc; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp index 0a8f3aa3ddb201..918bd5e401709e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp @@ -527,13 +527,80 @@ void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) { void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) { if (thr->ignore_sync) return; + thr->clock.set(thr->fast_state.epoch()); thr->fast_synch_epoch = thr->fast_state.epoch(); thr->clock.acq_rel(&thr->proc()->clock_cache, c); + +#if __TSAN_EXPERIMENTAL_FENCES + // TODO: add acquire fence clock update + + // Update the fence clock to be available for relaxed loads + // TODO: avoid when no explicit fences + thr->fence_clock_release.Resize(&thr->proc()->clock_cache, thr->clock.size()); + u64 *src_pos = &thr->clock.clk_[0]; + for (ClockElem &dst_elem : thr->fence_clock_release) { + u64 epoch = *src_pos; + if (dst_elem.epoch < epoch) { + dst_elem.epoch = epoch; + } + src_pos++; + } +#endif + StatInc(thr, StatSyncAcquire); StatInc(thr, StatSyncRelease); } +#if __TSAN_EXPERIMENTAL_FENCES +void AtomicFenceImplLoad(ThreadState *thr, uptr pc, SyncClock* src) { + // The relaxed load brought the implicit clock. So, update the acquire clock. + thr->fence_clock_acquire.Resize(&thr->proc()->clock_cache, src->size()); + + auto dst = thr->fence_clock_acquire.begin(); + for (ClockElem &src_elem : *src) { + u64 epoch = src_elem.epoch; + if ((*dst).epoch < epoch) { + (*dst).epoch = epoch; + } + ++dst; + } +} + +void AtomicFenceImplStore(ThreadState *thr, uptr pc, SyncClock* dst) { + // The release fence clock is propagated into sync clock + dst->Resize(&thr->proc()->clock_cache, thr->fence_clock_release.size()); + + auto src = thr->fence_clock_release.begin(); + for (ClockElem &dst_elem : *dst) { + u64 epoch = (*src).epoch; + if (dst_elem.epoch < epoch) { + dst_elem.epoch = epoch; + } + ++src; + } +} + +void AtomicFenceImplRMW(ThreadState *thr, uptr pc, SyncClock* src_dst) { + AtomicFenceImplLoad(thr, pc, src_dst); + AtomicFenceImplStore(thr, pc, src_dst); +} + +void AtomicFenceAcquireImpl(ThreadState *thr, uptr pc) { + if (thr->ignore_sync) + return; + thr->clock.set(thr->fast_state.epoch()); + thr->clock.acquire(&thr->proc()->clock_cache, &thr->fence_clock_acquire); +} +void AtomicFenceReleaseImpl(ThreadState *thr, uptr pc) { + if (thr->ignore_sync) + return; + thr->clock.set(thr->fast_state.epoch()); + thr->fast_synch_epoch = thr->fast_state.epoch(); + thr->clock.release(&thr->proc()->clock_cache, &thr->fence_clock_release); +} +#endif + void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock)) return; diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp index 6d1ccd8c9c78a5..fa5d7c29ed7785 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp @@ -148,6 +148,11 @@ void ThreadContext::OnFinished() { #if !SANITIZER_GO PlatformCleanUpThreadState(thr); #endif + +#if __TSAN_EXPERIMENTAL_FENCES + thr->fence_clock_acquire.Reset(&thr->proc()->clock_cache); + thr->fence_clock_release.Reset(&thr->proc()->clock_cache); +#endif thr->~ThreadState(); #if TSAN_COLLECT_STATS StatAggregate(ctx->stat, thr->stat); diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.h b/compiler-rt/lib/tsan/rtl/tsan_sync.h index c4056f684d7e86..05249ba09a9b7b 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_sync.h +++ b/compiler-rt/lib/tsan/rtl/tsan_sync.h @@ -65,6 +65,11 @@ struct SyncVar { // with the mtx. This reduces contention for hot sync objects. SyncClock clock; +#if __TSAN_EXPERIMENTAL_FENCES + // Carriers implicit clock of fences + SyncClock fence_clock; +#endif + void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid); void Reset(Processor *proc);