Skip to content

Commit

Permalink
runtime: benchmark mutex handoffs
Browse files Browse the repository at this point in the history
The speed of handing off a mutex to a waiting thread is sensitive to the
configuration of the spinning section of lock2. Measure that latency
directly, to complement our existing benchmarks of mutex throughput.

For #68578

Change-Id: I7637684bcff62eb05cc008491f095f653d13af4b
Reviewed-on: https://go-review.googlesource.com/c/go/+/602176
Reviewed-by: Dmitri Shuralyov <[email protected]>
Reviewed-by: Michael Knyszek <[email protected]>
Auto-Submit: Rhys Hiltner <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
  • Loading branch information
rhysh authored and gopherbot committed Aug 2, 2024
1 parent aac7106 commit e8776e1
Showing 1 changed file with 110 additions and 0 deletions.
110 changes: 110 additions & 0 deletions src/runtime/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package runtime_test
import (
"flag"
"fmt"
"internal/cpu"
"internal/runtime/atomic"
"io"
. "runtime"
"runtime/debug"
Expand Down Expand Up @@ -561,3 +563,111 @@ func BenchmarkOSYield(b *testing.B) {
OSYield()
}
}

func BenchmarkMutexHandoff(b *testing.B) {
testcase := func(delay func(l *Mutex)) func(b *testing.B) {
return func(b *testing.B) {
if workers := 2; GOMAXPROCS(0) < workers {
b.Skipf("requires GOMAXPROCS >= %d", workers)
}

// Measure latency of mutex handoff between threads.
//
// Hand off a runtime.mutex between two threads, one running a
// "coordinator" goroutine and the other running a "worker"
// goroutine. We don't override the runtime's typical
// goroutine/thread mapping behavior.
//
// Measure the latency, starting when the coordinator enters a call
// to runtime.unlock and ending when the worker's call to
// runtime.lock returns. The benchmark can specify a "delay"
// function to simulate the length of the mutex-holder's critical
// section, including to arrange for the worker's thread to be in
// either the "spinning" or "sleeping" portions of the runtime.lock2
// implementation. Measurement starts after any such "delay".
//
// The two threads' goroutines communicate their current position to
// each other in a non-blocking way via the "turn" state.

var state struct {
_ [cpu.CacheLinePadSize]byte
lock Mutex
_ [cpu.CacheLinePadSize]byte
turn atomic.Int64
_ [cpu.CacheLinePadSize]byte
}

var delta atomic.Int64
var wg sync.WaitGroup

// coordinator:
// - acquire the mutex
// - set the turn to 2 mod 4, instructing the worker to begin its Lock call
// - wait until the mutex is contended
// - wait a bit more so the worker can commit to its sleep
// - release the mutex and wait for it to be our turn (0 mod 4) again
wg.Add(1)
go func() {
defer wg.Done()
var t int64
for range b.N {
Lock(&state.lock)
state.turn.Add(2)
delay(&state.lock)
t -= Nanotime() // start the timer
Unlock(&state.lock)
for state.turn.Load()&0x2 != 0 {
}
}
state.turn.Add(1)
delta.Add(t)
}()

// worker:
// - wait until its our turn (2 mod 4)
// - acquire and release the mutex
// - switch the turn counter back to the coordinator (0 mod 4)
wg.Add(1)
go func() {
defer wg.Done()
var t int64
for {
switch state.turn.Load() & 0x3 {
case 0:
case 1, 3:
delta.Add(t)
return
case 2:
Lock(&state.lock)
t += Nanotime() // stop the timer
Unlock(&state.lock)
state.turn.Add(2)
}
}
}()

wg.Wait()
b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
}
}

b.Run("Solo", func(b *testing.B) {
var lock Mutex
for range b.N {
Lock(&lock)
Unlock(&lock)
}
})

b.Run("FastPingPong", testcase(func(l *Mutex) {}))
b.Run("SlowPingPong", testcase(func(l *Mutex) {
// Wait for the worker to stop spinning and prepare to sleep
for !MutexContended(l) {
}
// Wait a bit longer so the OS can finish committing the worker to its
// sleep. Balance consistency against getting enough iterations.
const extraNs = 10e3
for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
}
}))
}

0 comments on commit e8776e1

Please sign in to comment.