From 7c404d59db3591a7c5854b38dc0f05fcb7ac0cff Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Tue, 3 May 2022 19:28:25 +0000 Subject: [PATCH] runtime: store consistent total allocation stats as uint64 Currently the consistent total allocation stats are managed as uintptrs, which means they can easily overflow on 32-bit systems. Fix this by storing these stats as uint64s. This will cause some minor performance degradation on 32-bit systems, but there really isn't a way around this, and it affects the correctness of the metrics we export. Fixes #52680. Change-Id: I7e6ca44047d46b4bd91c6f87c2d29f730e0d6191 Reviewed-on: https://go-review.googlesource.com/c/go/+/403758 Run-TryBot: Michael Knyszek TryBot-Result: Gopher Robot Auto-Submit: Michael Knyszek Reviewed-by: Austin Clements --- src/runtime/mcache.go | 20 ++++++++++---------- src/runtime/metrics.go | 12 ++++++------ src/runtime/mgcsweep.go | 6 +++--- src/runtime/mstats.go | 42 +++++++++++++++++++++-------------------- 4 files changed, 41 insertions(+), 39 deletions(-) diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index 5a74431ff4c3eb..7c785900db7c45 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -159,18 +159,18 @@ func (c *mcache) refill(spc spanClass) { // Count up how many slots were used and record it. stats := memstats.heapStats.acquire() - slotsUsed := uintptr(s.allocCount) - uintptr(s.allocCountBeforeCache) - atomic.Xadduintptr(&stats.smallAllocCount[spc.sizeclass()], slotsUsed) + slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache) + atomic.Xadd64(&stats.smallAllocCount[spc.sizeclass()], slotsUsed) // Flush tinyAllocs. if spc == tinySpanClass { - atomic.Xadduintptr(&stats.tinyAllocCount, c.tinyAllocs) + atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs)) c.tinyAllocs = 0 } memstats.heapStats.release() // Count the allocs in inconsistent, internal stats. - bytesAllocated := int64(slotsUsed * s.elemsize) + bytesAllocated := slotsUsed * int64(s.elemsize) gcController.totalAlloc.Add(bytesAllocated) // Update heapLive and flush scanAlloc. @@ -224,8 +224,8 @@ func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan { // Count the alloc in consistent, external stats. stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.largeAlloc, npages*pageSize) - atomic.Xadduintptr(&stats.largeAllocCount, 1) + atomic.Xadd64(&stats.largeAlloc, int64(npages*pageSize)) + atomic.Xadd64(&stats.largeAllocCount, 1) memstats.heapStats.release() // Count the alloc in inconsistent, internal stats. @@ -250,17 +250,17 @@ func (c *mcache) releaseAll() { for i := range c.alloc { s := c.alloc[i] if s != &emptymspan { - slotsUsed := uintptr(s.allocCount) - uintptr(s.allocCountBeforeCache) + slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache) s.allocCountBeforeCache = 0 // Adjust smallAllocCount for whatever was allocated. stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.smallAllocCount[spanClass(i).sizeclass()], slotsUsed) + atomic.Xadd64(&stats.smallAllocCount[spanClass(i).sizeclass()], slotsUsed) memstats.heapStats.release() // Adjust the actual allocs in inconsistent, internal stats. // We assumed earlier that the full span gets allocated. - gcController.totalAlloc.Add(int64(slotsUsed * s.elemsize)) + gcController.totalAlloc.Add(slotsUsed * int64(s.elemsize)) // Release the span to the mcentral. mheap_.central[i].mcentral.uncacheSpan(s) @@ -273,7 +273,7 @@ func (c *mcache) releaseAll() { // Flush tinyAllocs. stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.tinyAllocCount, c.tinyAllocs) + atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs)) c.tinyAllocs = 0 memstats.heapStats.release() diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index 763863e3584a5b..1b29f82b6475e9 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -388,13 +388,13 @@ func (a *heapStatsAggregate) compute() { memstats.heapStats.read(&a.heapStatsDelta) // Calculate derived stats. - a.totalAllocs = uint64(a.largeAllocCount) - a.totalFrees = uint64(a.largeFreeCount) - a.totalAllocated = uint64(a.largeAlloc) - a.totalFreed = uint64(a.largeFree) + a.totalAllocs = a.largeAllocCount + a.totalFrees = a.largeFreeCount + a.totalAllocated = a.largeAlloc + a.totalFreed = a.largeFree for i := range a.smallAllocCount { - na := uint64(a.smallAllocCount[i]) - nf := uint64(a.smallFreeCount[i]) + na := a.smallAllocCount[i] + nf := a.smallFreeCount[i] a.totalAllocs += na a.totalFrees += nf a.totalAllocated += na * uint64(class_to_size[i]) diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 698b7bff314086..de57f18c4ff607 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -668,7 +668,7 @@ func (sl *sweepLocked) sweep(preserve bool) bool { // free slots zeroed. s.needzero = 1 stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.smallFreeCount[spc.sizeclass()], uintptr(nfreed)) + atomic.Xadd64(&stats.smallFreeCount[spc.sizeclass()], int64(nfreed)) memstats.heapStats.release() // Count the frees in the inconsistent, internal stats. @@ -720,8 +720,8 @@ func (sl *sweepLocked) sweep(preserve bool) bool { // Count the free in the consistent, external stats. stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.largeFreeCount, 1) - atomic.Xadduintptr(&stats.largeFree, size) + atomic.Xadd64(&stats.largeFreeCount, 1) + atomic.Xadd64(&stats.largeFree, int64(size)) memstats.heapStats.release() // Count the free in the inconsistent, internal stats. diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index f4b2da03fc40bc..0029ea956c5afe 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -7,7 +7,6 @@ package runtime import ( - "internal/goarch" "runtime/internal/atomic" "unsafe" ) @@ -388,10 +387,10 @@ func readmemstats_m(stats *MemStats) { memstats.heapStats.unsafeRead(&consStats) // Collect large allocation stats. - totalAlloc := uint64(consStats.largeAlloc) - nMalloc := uint64(consStats.largeAllocCount) - totalFree := uint64(consStats.largeFree) - nFree := uint64(consStats.largeFreeCount) + totalAlloc := consStats.largeAlloc + nMalloc := consStats.largeAllocCount + totalFree := consStats.largeFree + nFree := consStats.largeFreeCount // Collect per-sizeclass stats. var bySize [_NumSizeClasses]struct { @@ -403,13 +402,13 @@ func readmemstats_m(stats *MemStats) { bySize[i].Size = uint32(class_to_size[i]) // Malloc stats. - a := uint64(consStats.smallAllocCount[i]) + a := consStats.smallAllocCount[i] totalAlloc += a * uint64(class_to_size[i]) nMalloc += a bySize[i].Mallocs = a // Free stats. - f := uint64(consStats.smallFreeCount[i]) + f := consStats.smallFreeCount[i] totalFree += f * uint64(class_to_size[i]) nFree += f bySize[i].Frees = f @@ -421,8 +420,8 @@ func readmemstats_m(stats *MemStats) { // memory in some sense because their tiny allocation block is also // counted. Tracking the lifetime of individual tiny allocations is // currently not done because it would be too expensive. - nFree += uint64(consStats.tinyAllocCount) - nMalloc += uint64(consStats.tinyAllocCount) + nFree += consStats.tinyAllocCount + nMalloc += consStats.tinyAllocCount // Calculate derived stats. @@ -663,17 +662,20 @@ type heapStatsDelta struct { inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits // Allocator stats. - tinyAllocCount uintptr // number of tiny allocations - largeAlloc uintptr // bytes allocated for large objects - largeAllocCount uintptr // number of large object allocations - smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects - largeFree uintptr // bytes freed for large objects (>maxSmallSize) - largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) - smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) - - // Add a uint32 to ensure this struct is a multiple of 8 bytes in size. - // Only necessary on 32-bit platforms. - _ [(goarch.PtrSize / 4) % 2]uint32 + // + // These are all uint64 because they're cumulative, and could quickly wrap + // around otherwise. + tinyAllocCount uint64 // number of tiny allocations + largeAlloc uint64 // bytes allocated for large objects + largeAllocCount uint64 // number of large object allocations + smallAllocCount [_NumSizeClasses]uint64 // number of allocs for small objects + largeFree uint64 // bytes freed for large objects (>maxSmallSize) + largeFreeCount uint64 // number of frees for large objects (>maxSmallSize) + smallFreeCount [_NumSizeClasses]uint64 // number of frees for small objects (<=maxSmallSize) + + // NOTE: This struct must be a multiple of 8 bytes in size because it + // is stored in an array. If it's not, atomic accesses to the above + // fields may be unaligned and fail on 32-bit platforms. } // merge adds in the deltas from b into a.