Skip to content

Commit

Permalink
runtime: fix net poll races
Browse files Browse the repository at this point in the history
The netpoll code was written long ago, when the
only multiprocessors that Go ran on were x86.
It assumed that an atomic store would trigger a
full memory barrier and then used that barrier
to order otherwise racy access to a handful of fields,
including pollDesc.closing.

On ARM64, this code has finally failed, because
the atomic store is on a value completely unrelated
to any of the racily-accessed fields, and the ARMv8
hardware, unlike x86, is clever enough not to do a
full memory barrier for a simple atomic store.
We are seeing a constant background rate of trybot
failures where the net/http tests deadlock - a netpollblock
has clearly happened after the pollDesc has begun to close.

The code that does the racy reads is netpollcheckerr,
which needs to be able to run without acquiring a lock.
This CL fixes the race, without introducing unnecessary
inefficiency or deadlock, by arranging for every updater
of the relevant fields to publish a summary as a single
atomic uint32, and then having netpollcheckerr use a
single atomic load to fetch the relevant bits and then
proceed as before.

Fixes #45211 (until proven otherwise!).

Change-Id: Ib6788c8da4d00b7bda84d55ca3fdffb5a64c1a0a
Reviewed-on: https://go-review.googlesource.com/c/go/+/378234
Trust: Russ Cox <[email protected]>
Run-TryBot: Russ Cox <[email protected]>
Trust: Bryan Mills <[email protected]>
Reviewed-by: Ian Lance Taylor <[email protected]>
  • Loading branch information
rsc committed Jan 14, 2022
1 parent b41185c commit 17b2fb1
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 57 deletions.
157 changes: 113 additions & 44 deletions src/runtime/netpoll.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,31 +71,99 @@ const pollBlockSize = 4 * 1024
//go:notinheap
type pollDesc struct {
link *pollDesc // in pollcache, protected by pollcache.lock
fd uintptr // constant for pollDesc usage lifetime

// atomicInfo holds bits from closing, rd, and wd,
// which are only ever written while holding the lock,
// summarized for use by netpollcheckerr,
// which cannot acquire the lock.
// After writing these fields under lock in a way that
// might change the summary, code must call publishInfo
// before releasing the lock.
// Code that changes fields and then calls netpollunblock
// (while still holding the lock) must call publishInfo
// before calling netpollunblock, because publishInfo is what
// stops netpollblock from blocking anew
// (by changing the result of netpollcheckerr).
// atomicInfo also holds the eventErr bit,
// recording whether a poll event on the fd got an error;
// atomicInfo is the only source of truth for that bit.
atomicInfo atomic.Uint32 // atomic pollInfo

// rg, wg are accessed atomically and hold g pointers.
// (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil
wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil

// The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
// This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
// pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification)
// proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated
// in a lock-free way by all operations.
// TODO(golang.org/issue/49008): audit these lock-free fields for continued correctness.
// NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg),
// that will blow up when GC starts moving objects.
lock mutex // protects the following fields
fd uintptr
closing bool
everr bool // marks event scanning error happened
user uint32 // user settable cookie
rseq uintptr // protects from stale read timers
rg uintptr // pdReady, pdWait, G waiting for read or nil. Accessed atomically.
rt timer // read deadline timer (set if rt.f != nil)
rd int64 // read deadline
rd int64 // read deadline (a nanotime in the future, -1 when expired)
wseq uintptr // protects from stale write timers
wg uintptr // pdReady, pdWait, G waiting for write or nil. Accessed atomically.
wt timer // write deadline timer
wd int64 // write deadline
wd int64 // write deadline (a nanotime in the future, -1 when expired)
self *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
}

// pollInfo is the bits needed by netpollcheckerr, stored atomically,
// mostly duplicating state that is manipulated under lock in pollDesc.
// The one exception is the pollEventErr bit, which is maintained only
// in the pollInfo.
type pollInfo uint32

const (
pollClosing = 1 << iota
pollEventErr
pollExpiredReadDeadline
pollExpiredWriteDeadline
)

func (i pollInfo) closing() bool { return i&pollClosing != 0 }
func (i pollInfo) eventErr() bool { return i&pollEventErr != 0 }
func (i pollInfo) expiredReadDeadline() bool { return i&pollExpiredReadDeadline != 0 }
func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 }

// info returns the pollInfo corresponding to pd.
func (pd *pollDesc) info() pollInfo {
return pollInfo(pd.atomicInfo.Load())
}

// publishInfo updates pd.atomicInfo (returned by pd.info)
// using the other values in pd.
// It must be called while holding pd.lock,
// and it must be called after changing anything
// that might affect the info bits.
// In practice this means after changing closing
// or changing rd or wd from < 0 to >= 0.
func (pd *pollDesc) publishInfo() {
var info uint32
if pd.closing {
info |= pollClosing
}
if pd.rd < 0 {
info |= pollExpiredReadDeadline
}
if pd.wd < 0 {
info |= pollExpiredWriteDeadline
}

// Set all of x except the pollEventErr bit.
x := pd.atomicInfo.Load()
for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)|info) {
x = pd.atomicInfo.Load()
}
}

// setEventErr sets the result of pd.info().eventErr() to b.
func (pd *pollDesc) setEventErr(b bool) {
x := pd.atomicInfo.Load()
for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) {
x = pd.atomicInfo.Load()
}
}

type pollCache struct {
lock mutex
first *pollDesc
Expand Down Expand Up @@ -147,24 +215,25 @@ func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
pd := pollcache.alloc()
lock(&pd.lock)
wg := atomic.Loaduintptr(&pd.wg)
wg := pd.wg.Load()
if wg != 0 && wg != pdReady {
throw("runtime: blocked write on free polldesc")
}
rg := atomic.Loaduintptr(&pd.rg)
rg := pd.rg.Load()
if rg != 0 && rg != pdReady {
throw("runtime: blocked read on free polldesc")
}
pd.fd = fd
pd.closing = false
pd.everr = false
pd.setEventErr(false)
pd.rseq++
atomic.Storeuintptr(&pd.rg, 0)
pd.rg.Store(0)
pd.rd = 0
pd.wseq++
atomic.Storeuintptr(&pd.wg, 0)
pd.wg.Store(0)
pd.wd = 0
pd.self = pd
pd.publishInfo()
unlock(&pd.lock)

errno := netpollopen(fd, pd)
Expand All @@ -180,11 +249,11 @@ func poll_runtime_pollClose(pd *pollDesc) {
if !pd.closing {
throw("runtime: close polldesc w/o unblock")
}
wg := atomic.Loaduintptr(&pd.wg)
wg := pd.wg.Load()
if wg != 0 && wg != pdReady {
throw("runtime: blocked write on closing polldesc")
}
rg := atomic.Loaduintptr(&pd.rg)
rg := pd.rg.Load()
if rg != 0 && rg != pdReady {
throw("runtime: blocked read on closing polldesc")
}
Expand All @@ -209,9 +278,9 @@ func poll_runtime_pollReset(pd *pollDesc, mode int) int {
return errcode
}
if mode == 'r' {
atomic.Storeuintptr(&pd.rg, 0)
pd.rg.Store(0)
} else if mode == 'w' {
atomic.Storeuintptr(&pd.wg, 0)
pd.wg.Store(0)
}
return pollNoError
}
Expand Down Expand Up @@ -273,6 +342,7 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
if mode == 'w' || mode == 'r'+'w' {
pd.wd = d
}
pd.publishInfo()
combo := pd.rd > 0 && pd.rd == pd.wd
rtf := netpollReadDeadline
if combo {
Expand Down Expand Up @@ -314,15 +384,13 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
}
}
// If we set the new deadline in the past, unblock currently pending IO if any.
// Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd.
var rg, wg *g
if pd.rd < 0 || pd.wd < 0 {
atomic.StorepNoWB(noescape(unsafe.Pointer(&wg)), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
if pd.rd < 0 {
rg = netpollunblock(pd, 'r', false)
}
if pd.wd < 0 {
wg = netpollunblock(pd, 'w', false)
}
if pd.rd < 0 {
rg = netpollunblock(pd, 'r', false)
}
if pd.wd < 0 {
wg = netpollunblock(pd, 'w', false)
}
unlock(&pd.lock)
if rg != nil {
Expand All @@ -343,7 +411,7 @@ func poll_runtime_pollUnblock(pd *pollDesc) {
pd.rseq++
pd.wseq++
var rg, wg *g
atomic.StorepNoWB(noescape(unsafe.Pointer(&rg)), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock
pd.publishInfo()
rg = netpollunblock(pd, 'r', false)
wg = netpollunblock(pd, 'w', false)
if pd.rt.f != nil {
Expand Down Expand Up @@ -388,16 +456,17 @@ func netpollready(toRun *gList, pd *pollDesc, mode int32) {
}

func netpollcheckerr(pd *pollDesc, mode int32) int {
if pd.closing {
info := pd.info()
if info.closing() {
return pollErrClosing
}
if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) {
if (mode == 'r' && info.expiredReadDeadline()) || (mode == 'w' && info.expiredWriteDeadline()) {
return pollErrTimeout
}
// Report an event scanning error only on a read event.
// An error on a write event will be captured in a subsequent
// write call that is able to report a more specific error.
if mode == 'r' && pd.everr {
if mode == 'r' && info.eventErr() {
return pollErrNotPollable
}
return pollNoError
Expand Down Expand Up @@ -432,28 +501,28 @@ func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
// set the gpp semaphore to pdWait
for {
// Consume notification if already ready.
if atomic.Casuintptr(gpp, pdReady, 0) {
if gpp.CompareAndSwap(pdReady, 0) {
return true
}
if atomic.Casuintptr(gpp, 0, pdWait) {
if gpp.CompareAndSwap(0, pdWait) {
break
}

// Double check that this isn't corrupt; otherwise we'd loop
// forever.
if v := atomic.Loaduintptr(gpp); v != pdReady && v != 0 {
if v := gpp.Load(); v != pdReady && v != 0 {
throw("runtime: double wait")
}
}

// need to recheck error states after setting gpp to pdWait
// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
// do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg
if waitio || netpollcheckerr(pd, mode) == pollNoError {
gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
}
// be careful to not lose concurrent pdReady notification
old := atomic.Xchguintptr(gpp, 0)
old := gpp.Swap(0)
if old > pdWait {
throw("runtime: corrupted polldesc")
}
Expand All @@ -467,7 +536,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
}

for {
old := atomic.Loaduintptr(gpp)
old := gpp.Load()
if old == pdReady {
return nil
}
Expand All @@ -480,7 +549,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
if ioready {
new = pdReady
}
if atomic.Casuintptr(gpp, old, new) {
if gpp.CompareAndSwap(old, new) {
if old == pdWait {
old = 0
}
Expand Down Expand Up @@ -508,7 +577,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
throw("runtime: inconsistent read deadline")
}
pd.rd = -1
atomic.StorepNoWB(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock
pd.publishInfo()
rg = netpollunblock(pd, 'r', false)
}
var wg *g
Expand All @@ -517,7 +586,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
throw("runtime: inconsistent write deadline")
}
pd.wd = -1
atomic.StorepNoWB(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock
pd.publishInfo()
wg = netpollunblock(pd, 'w', false)
}
unlock(&pd.lock)
Expand Down
5 changes: 1 addition & 4 deletions src/runtime/netpoll_aix.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,7 @@ retry:
pfd.events &= ^_POLLOUT
}
if mode != 0 {
pds[i].everr = false
if pfd.revents == _POLLERR {
pds[i].everr = true
}
pds[i].setEventErr(pfd.revents == _POLLERR)
netpollready(&toRun, pds[i], mode)
n--
}
Expand Down
5 changes: 1 addition & 4 deletions src/runtime/netpoll_epoll.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,7 @@ retry:
}
if mode != 0 {
pd := *(**pollDesc)(unsafe.Pointer(&ev.data))
pd.everr = false
if ev.events == _EPOLLERR {
pd.everr = true
}
pd.setEventErr(ev.events == _EPOLLERR)
netpollready(&toRun, pd, mode)
}
}
Expand Down
5 changes: 1 addition & 4 deletions src/runtime/netpoll_kqueue.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,7 @@ retry:
}
if mode != 0 {
pd := (*pollDesc)(unsafe.Pointer(ev.udata))
pd.everr = false
if ev.flags == _EV_ERROR {
pd.everr = true
}
pd.setEventErr(ev.flags == _EV_ERROR)
netpollready(&toRun, pd, mode)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/netpoll_solaris.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ func netpollclose(fd uintptr) int32 {
// this call, port_getn will return one and only one event for that
// particular descriptor, so this function needs to be called again.
func netpollupdate(pd *pollDesc, set, clear uint32) {
if pd.closing {
if pd.info().closing() {
return
}

Expand Down
2 changes: 2 additions & 0 deletions src/runtime/runtime2.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ func efaceOf(ep *any) *eface {
// so I can't see them ever moving. If we did want to start moving data
// in the GC, we'd need to allocate the goroutine structs from an
// alternate arena. Using guintptr doesn't make that problem any worse.
// Note that pollDesc.rg, pollDesc.wg also store g in uintptr form,
// so they would need to be updated too if g's start moving.
type guintptr uintptr

//go:nosplit
Expand Down

0 comments on commit 17b2fb1

Please sign in to comment.