From 17b2fb1b656a275906b5071c562439d50a27f167 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Wed, 12 Jan 2022 17:22:09 -0500 Subject: [PATCH] runtime: fix net poll races The netpoll code was written long ago, when the only multiprocessors that Go ran on were x86. It assumed that an atomic store would trigger a full memory barrier and then used that barrier to order otherwise racy access to a handful of fields, including pollDesc.closing. On ARM64, this code has finally failed, because the atomic store is on a value completely unrelated to any of the racily-accessed fields, and the ARMv8 hardware, unlike x86, is clever enough not to do a full memory barrier for a simple atomic store. We are seeing a constant background rate of trybot failures where the net/http tests deadlock - a netpollblock has clearly happened after the pollDesc has begun to close. The code that does the racy reads is netpollcheckerr, which needs to be able to run without acquiring a lock. This CL fixes the race, without introducing unnecessary inefficiency or deadlock, by arranging for every updater of the relevant fields to publish a summary as a single atomic uint32, and then having netpollcheckerr use a single atomic load to fetch the relevant bits and then proceed as before. Fixes #45211 (until proven otherwise!). Change-Id: Ib6788c8da4d00b7bda84d55ca3fdffb5a64c1a0a Reviewed-on: https://go-review.googlesource.com/c/go/+/378234 Trust: Russ Cox Run-TryBot: Russ Cox Trust: Bryan Mills Reviewed-by: Ian Lance Taylor --- src/runtime/netpoll.go | 157 ++++++++++++++++++++++++--------- src/runtime/netpoll_aix.go | 5 +- src/runtime/netpoll_epoll.go | 5 +- src/runtime/netpoll_kqueue.go | 5 +- src/runtime/netpoll_solaris.go | 2 +- src/runtime/runtime2.go | 2 + 6 files changed, 119 insertions(+), 57 deletions(-) diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go index 322a6f3637cd40..bb3dd35317ffa0 100644 --- a/src/runtime/netpoll.go +++ b/src/runtime/netpoll.go @@ -71,31 +71,99 @@ const pollBlockSize = 4 * 1024 //go:notinheap type pollDesc struct { link *pollDesc // in pollcache, protected by pollcache.lock + fd uintptr // constant for pollDesc usage lifetime + + // atomicInfo holds bits from closing, rd, and wd, + // which are only ever written while holding the lock, + // summarized for use by netpollcheckerr, + // which cannot acquire the lock. + // After writing these fields under lock in a way that + // might change the summary, code must call publishInfo + // before releasing the lock. + // Code that changes fields and then calls netpollunblock + // (while still holding the lock) must call publishInfo + // before calling netpollunblock, because publishInfo is what + // stops netpollblock from blocking anew + // (by changing the result of netpollcheckerr). + // atomicInfo also holds the eventErr bit, + // recording whether a poll event on the fd got an error; + // atomicInfo is the only source of truth for that bit. + atomicInfo atomic.Uint32 // atomic pollInfo + + // rg, wg are accessed atomically and hold g pointers. + // (Using atomic.Uintptr here is similar to using guintptr elsewhere.) + rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil + wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil - // The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations. - // This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime. - // pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification) - // proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated - // in a lock-free way by all operations. - // TODO(golang.org/issue/49008): audit these lock-free fields for continued correctness. - // NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg), - // that will blow up when GC starts moving objects. lock mutex // protects the following fields - fd uintptr closing bool - everr bool // marks event scanning error happened user uint32 // user settable cookie rseq uintptr // protects from stale read timers - rg uintptr // pdReady, pdWait, G waiting for read or nil. Accessed atomically. rt timer // read deadline timer (set if rt.f != nil) - rd int64 // read deadline + rd int64 // read deadline (a nanotime in the future, -1 when expired) wseq uintptr // protects from stale write timers - wg uintptr // pdReady, pdWait, G waiting for write or nil. Accessed atomically. wt timer // write deadline timer - wd int64 // write deadline + wd int64 // write deadline (a nanotime in the future, -1 when expired) self *pollDesc // storage for indirect interface. See (*pollDesc).makeArg. } +// pollInfo is the bits needed by netpollcheckerr, stored atomically, +// mostly duplicating state that is manipulated under lock in pollDesc. +// The one exception is the pollEventErr bit, which is maintained only +// in the pollInfo. +type pollInfo uint32 + +const ( + pollClosing = 1 << iota + pollEventErr + pollExpiredReadDeadline + pollExpiredWriteDeadline +) + +func (i pollInfo) closing() bool { return i&pollClosing != 0 } +func (i pollInfo) eventErr() bool { return i&pollEventErr != 0 } +func (i pollInfo) expiredReadDeadline() bool { return i&pollExpiredReadDeadline != 0 } +func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 } + +// info returns the pollInfo corresponding to pd. +func (pd *pollDesc) info() pollInfo { + return pollInfo(pd.atomicInfo.Load()) +} + +// publishInfo updates pd.atomicInfo (returned by pd.info) +// using the other values in pd. +// It must be called while holding pd.lock, +// and it must be called after changing anything +// that might affect the info bits. +// In practice this means after changing closing +// or changing rd or wd from < 0 to >= 0. +func (pd *pollDesc) publishInfo() { + var info uint32 + if pd.closing { + info |= pollClosing + } + if pd.rd < 0 { + info |= pollExpiredReadDeadline + } + if pd.wd < 0 { + info |= pollExpiredWriteDeadline + } + + // Set all of x except the pollEventErr bit. + x := pd.atomicInfo.Load() + for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)|info) { + x = pd.atomicInfo.Load() + } +} + +// setEventErr sets the result of pd.info().eventErr() to b. +func (pd *pollDesc) setEventErr(b bool) { + x := pd.atomicInfo.Load() + for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) { + x = pd.atomicInfo.Load() + } +} + type pollCache struct { lock mutex first *pollDesc @@ -147,24 +215,25 @@ func poll_runtime_isPollServerDescriptor(fd uintptr) bool { func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) { pd := pollcache.alloc() lock(&pd.lock) - wg := atomic.Loaduintptr(&pd.wg) + wg := pd.wg.Load() if wg != 0 && wg != pdReady { throw("runtime: blocked write on free polldesc") } - rg := atomic.Loaduintptr(&pd.rg) + rg := pd.rg.Load() if rg != 0 && rg != pdReady { throw("runtime: blocked read on free polldesc") } pd.fd = fd pd.closing = false - pd.everr = false + pd.setEventErr(false) pd.rseq++ - atomic.Storeuintptr(&pd.rg, 0) + pd.rg.Store(0) pd.rd = 0 pd.wseq++ - atomic.Storeuintptr(&pd.wg, 0) + pd.wg.Store(0) pd.wd = 0 pd.self = pd + pd.publishInfo() unlock(&pd.lock) errno := netpollopen(fd, pd) @@ -180,11 +249,11 @@ func poll_runtime_pollClose(pd *pollDesc) { if !pd.closing { throw("runtime: close polldesc w/o unblock") } - wg := atomic.Loaduintptr(&pd.wg) + wg := pd.wg.Load() if wg != 0 && wg != pdReady { throw("runtime: blocked write on closing polldesc") } - rg := atomic.Loaduintptr(&pd.rg) + rg := pd.rg.Load() if rg != 0 && rg != pdReady { throw("runtime: blocked read on closing polldesc") } @@ -209,9 +278,9 @@ func poll_runtime_pollReset(pd *pollDesc, mode int) int { return errcode } if mode == 'r' { - atomic.Storeuintptr(&pd.rg, 0) + pd.rg.Store(0) } else if mode == 'w' { - atomic.Storeuintptr(&pd.wg, 0) + pd.wg.Store(0) } return pollNoError } @@ -273,6 +342,7 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) { if mode == 'w' || mode == 'r'+'w' { pd.wd = d } + pd.publishInfo() combo := pd.rd > 0 && pd.rd == pd.wd rtf := netpollReadDeadline if combo { @@ -314,15 +384,13 @@ func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) { } } // If we set the new deadline in the past, unblock currently pending IO if any. + // Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd. var rg, wg *g - if pd.rd < 0 || pd.wd < 0 { - atomic.StorepNoWB(noescape(unsafe.Pointer(&wg)), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock - if pd.rd < 0 { - rg = netpollunblock(pd, 'r', false) - } - if pd.wd < 0 { - wg = netpollunblock(pd, 'w', false) - } + if pd.rd < 0 { + rg = netpollunblock(pd, 'r', false) + } + if pd.wd < 0 { + wg = netpollunblock(pd, 'w', false) } unlock(&pd.lock) if rg != nil { @@ -343,7 +411,7 @@ func poll_runtime_pollUnblock(pd *pollDesc) { pd.rseq++ pd.wseq++ var rg, wg *g - atomic.StorepNoWB(noescape(unsafe.Pointer(&rg)), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock + pd.publishInfo() rg = netpollunblock(pd, 'r', false) wg = netpollunblock(pd, 'w', false) if pd.rt.f != nil { @@ -388,16 +456,17 @@ func netpollready(toRun *gList, pd *pollDesc, mode int32) { } func netpollcheckerr(pd *pollDesc, mode int32) int { - if pd.closing { + info := pd.info() + if info.closing() { return pollErrClosing } - if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) { + if (mode == 'r' && info.expiredReadDeadline()) || (mode == 'w' && info.expiredWriteDeadline()) { return pollErrTimeout } // Report an event scanning error only on a read event. // An error on a write event will be captured in a subsequent // write call that is able to report a more specific error. - if mode == 'r' && pd.everr { + if mode == 'r' && info.eventErr() { return pollErrNotPollable } return pollNoError @@ -432,28 +501,28 @@ func netpollblock(pd *pollDesc, mode int32, waitio bool) bool { // set the gpp semaphore to pdWait for { // Consume notification if already ready. - if atomic.Casuintptr(gpp, pdReady, 0) { + if gpp.CompareAndSwap(pdReady, 0) { return true } - if atomic.Casuintptr(gpp, 0, pdWait) { + if gpp.CompareAndSwap(0, pdWait) { break } // Double check that this isn't corrupt; otherwise we'd loop // forever. - if v := atomic.Loaduintptr(gpp); v != pdReady && v != 0 { + if v := gpp.Load(); v != pdReady && v != 0 { throw("runtime: double wait") } } // need to recheck error states after setting gpp to pdWait // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl - // do the opposite: store to closing/rd/wd, membarrier, load of rg/wg + // do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg if waitio || netpollcheckerr(pd, mode) == pollNoError { gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5) } // be careful to not lose concurrent pdReady notification - old := atomic.Xchguintptr(gpp, 0) + old := gpp.Swap(0) if old > pdWait { throw("runtime: corrupted polldesc") } @@ -467,7 +536,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g { } for { - old := atomic.Loaduintptr(gpp) + old := gpp.Load() if old == pdReady { return nil } @@ -480,7 +549,7 @@ func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g { if ioready { new = pdReady } - if atomic.Casuintptr(gpp, old, new) { + if gpp.CompareAndSwap(old, new) { if old == pdWait { old = 0 } @@ -508,7 +577,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) { throw("runtime: inconsistent read deadline") } pd.rd = -1 - atomic.StorepNoWB(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock + pd.publishInfo() rg = netpollunblock(pd, 'r', false) } var wg *g @@ -517,7 +586,7 @@ func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) { throw("runtime: inconsistent write deadline") } pd.wd = -1 - atomic.StorepNoWB(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock + pd.publishInfo() wg = netpollunblock(pd, 'w', false) } unlock(&pd.lock) diff --git a/src/runtime/netpoll_aix.go b/src/runtime/netpoll_aix.go index 4590ed81a60c09..90950af4447d9c 100644 --- a/src/runtime/netpoll_aix.go +++ b/src/runtime/netpoll_aix.go @@ -212,10 +212,7 @@ retry: pfd.events &= ^_POLLOUT } if mode != 0 { - pds[i].everr = false - if pfd.revents == _POLLERR { - pds[i].everr = true - } + pds[i].setEventErr(pfd.revents == _POLLERR) netpollready(&toRun, pds[i], mode) n-- } diff --git a/src/runtime/netpoll_epoll.go b/src/runtime/netpoll_epoll.go index e0fb877d50858b..b7d61999659a1e 100644 --- a/src/runtime/netpoll_epoll.go +++ b/src/runtime/netpoll_epoll.go @@ -168,10 +168,7 @@ retry: } if mode != 0 { pd := *(**pollDesc)(unsafe.Pointer(&ev.data)) - pd.everr = false - if ev.events == _EPOLLERR { - pd.everr = true - } + pd.setEventErr(ev.events == _EPOLLERR) netpollready(&toRun, pd, mode) } } diff --git a/src/runtime/netpoll_kqueue.go b/src/runtime/netpoll_kqueue.go index 2f7f2848d296a6..1694753b6fe0db 100644 --- a/src/runtime/netpoll_kqueue.go +++ b/src/runtime/netpoll_kqueue.go @@ -179,10 +179,7 @@ retry: } if mode != 0 { pd := (*pollDesc)(unsafe.Pointer(ev.udata)) - pd.everr = false - if ev.flags == _EV_ERROR { - pd.everr = true - } + pd.setEventErr(ev.flags == _EV_ERROR) netpollready(&toRun, pd, mode) } } diff --git a/src/runtime/netpoll_solaris.go b/src/runtime/netpoll_solaris.go index d217d5b1604ae7..6e545b3d31b283 100644 --- a/src/runtime/netpoll_solaris.go +++ b/src/runtime/netpoll_solaris.go @@ -158,7 +158,7 @@ func netpollclose(fd uintptr) int32 { // this call, port_getn will return one and only one event for that // particular descriptor, so this function needs to be called again. func netpollupdate(pd *pollDesc, set, clear uint32) { - if pd.closing { + if pd.info().closing() { return } diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go index ec34e7ac1ae873..d0b7a162d59094 100644 --- a/src/runtime/runtime2.go +++ b/src/runtime/runtime2.go @@ -255,6 +255,8 @@ func efaceOf(ep *any) *eface { // so I can't see them ever moving. If we did want to start moving data // in the GC, we'd need to allocate the goroutine structs from an // alternate arena. Using guintptr doesn't make that problem any worse. +// Note that pollDesc.rg, pollDesc.wg also store g in uintptr form, +// so they would need to be updated too if g's start moving. type guintptr uintptr //go:nosplit