From 7c404d59db3591a7c5854b38dc0f05fcb7ac0cff Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Tue, 3 May 2022 19:28:25 +0000
Subject: [PATCH] runtime: store consistent total allocation stats as uint64

Currently the consistent total allocation stats are managed as uintptrs,
which means they can easily overflow on 32-bit systems. Fix this by
storing these stats as uint64s. This will cause some minor performance
degradation on 32-bit systems, but there really isn't a way around this,
and it affects the correctness of the metrics we export.

Fixes #52680.

Change-Id: I7e6ca44047d46b4bd91c6f87c2d29f730e0d6191
Reviewed-on: https://go-review.googlesource.com/c/go/+/403758
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Austin Clements <austin@google.com>
---
 src/runtime/mcache.go   | 20 ++++++++++----------
 src/runtime/metrics.go  | 12 ++++++------
 src/runtime/mgcsweep.go |  6 +++---
 src/runtime/mstats.go   | 42 +++++++++++++++++++++--------------------
 4 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index 5a74431ff4c3eb..7c785900db7c45 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -159,18 +159,18 @@ func (c *mcache) refill(spc spanClass) {
 
 		// Count up how many slots were used and record it.
 		stats := memstats.heapStats.acquire()
-		slotsUsed := uintptr(s.allocCount) - uintptr(s.allocCountBeforeCache)
-		atomic.Xadduintptr(&stats.smallAllocCount[spc.sizeclass()], slotsUsed)
+		slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache)
+		atomic.Xadd64(&stats.smallAllocCount[spc.sizeclass()], slotsUsed)
 
 		// Flush tinyAllocs.
 		if spc == tinySpanClass {
-			atomic.Xadduintptr(&stats.tinyAllocCount, c.tinyAllocs)
+			atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs))
 			c.tinyAllocs = 0
 		}
 		memstats.heapStats.release()
 
 		// Count the allocs in inconsistent, internal stats.
-		bytesAllocated := int64(slotsUsed * s.elemsize)
+		bytesAllocated := slotsUsed * int64(s.elemsize)
 		gcController.totalAlloc.Add(bytesAllocated)
 
 		// Update heapLive and flush scanAlloc.
@@ -224,8 +224,8 @@ func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
 
 	// Count the alloc in consistent, external stats.
 	stats := memstats.heapStats.acquire()
-	atomic.Xadduintptr(&stats.largeAlloc, npages*pageSize)
-	atomic.Xadduintptr(&stats.largeAllocCount, 1)
+	atomic.Xadd64(&stats.largeAlloc, int64(npages*pageSize))
+	atomic.Xadd64(&stats.largeAllocCount, 1)
 	memstats.heapStats.release()
 
 	// Count the alloc in inconsistent, internal stats.
@@ -250,17 +250,17 @@ func (c *mcache) releaseAll() {
 	for i := range c.alloc {
 		s := c.alloc[i]
 		if s != &emptymspan {
-			slotsUsed := uintptr(s.allocCount) - uintptr(s.allocCountBeforeCache)
+			slotsUsed := int64(s.allocCount) - int64(s.allocCountBeforeCache)
 			s.allocCountBeforeCache = 0
 
 			// Adjust smallAllocCount for whatever was allocated.
 			stats := memstats.heapStats.acquire()
-			atomic.Xadduintptr(&stats.smallAllocCount[spanClass(i).sizeclass()], slotsUsed)
+			atomic.Xadd64(&stats.smallAllocCount[spanClass(i).sizeclass()], slotsUsed)
 			memstats.heapStats.release()
 
 			// Adjust the actual allocs in inconsistent, internal stats.
 			// We assumed earlier that the full span gets allocated.
-			gcController.totalAlloc.Add(int64(slotsUsed * s.elemsize))
+			gcController.totalAlloc.Add(slotsUsed * int64(s.elemsize))
 
 			// Release the span to the mcentral.
 			mheap_.central[i].mcentral.uncacheSpan(s)
@@ -273,7 +273,7 @@ func (c *mcache) releaseAll() {
 
 	// Flush tinyAllocs.
 	stats := memstats.heapStats.acquire()
-	atomic.Xadduintptr(&stats.tinyAllocCount, c.tinyAllocs)
+	atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs))
 	c.tinyAllocs = 0
 	memstats.heapStats.release()
 
diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go
index 763863e3584a5b..1b29f82b6475e9 100644
--- a/src/runtime/metrics.go
+++ b/src/runtime/metrics.go
@@ -388,13 +388,13 @@ func (a *heapStatsAggregate) compute() {
 	memstats.heapStats.read(&a.heapStatsDelta)
 
 	// Calculate derived stats.
-	a.totalAllocs = uint64(a.largeAllocCount)
-	a.totalFrees = uint64(a.largeFreeCount)
-	a.totalAllocated = uint64(a.largeAlloc)
-	a.totalFreed = uint64(a.largeFree)
+	a.totalAllocs = a.largeAllocCount
+	a.totalFrees = a.largeFreeCount
+	a.totalAllocated = a.largeAlloc
+	a.totalFreed = a.largeFree
 	for i := range a.smallAllocCount {
-		na := uint64(a.smallAllocCount[i])
-		nf := uint64(a.smallFreeCount[i])
+		na := a.smallAllocCount[i]
+		nf := a.smallFreeCount[i]
 		a.totalAllocs += na
 		a.totalFrees += nf
 		a.totalAllocated += na * uint64(class_to_size[i])
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 698b7bff314086..de57f18c4ff607 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -668,7 +668,7 @@ func (sl *sweepLocked) sweep(preserve bool) bool {
 			// free slots zeroed.
 			s.needzero = 1
 			stats := memstats.heapStats.acquire()
-			atomic.Xadduintptr(&stats.smallFreeCount[spc.sizeclass()], uintptr(nfreed))
+			atomic.Xadd64(&stats.smallFreeCount[spc.sizeclass()], int64(nfreed))
 			memstats.heapStats.release()
 
 			// Count the frees in the inconsistent, internal stats.
@@ -720,8 +720,8 @@ func (sl *sweepLocked) sweep(preserve bool) bool {
 
 			// Count the free in the consistent, external stats.
 			stats := memstats.heapStats.acquire()
-			atomic.Xadduintptr(&stats.largeFreeCount, 1)
-			atomic.Xadduintptr(&stats.largeFree, size)
+			atomic.Xadd64(&stats.largeFreeCount, 1)
+			atomic.Xadd64(&stats.largeFree, int64(size))
 			memstats.heapStats.release()
 
 			// Count the free in the inconsistent, internal stats.
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index f4b2da03fc40bc..0029ea956c5afe 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -7,7 +7,6 @@
 package runtime
 
 import (
-	"internal/goarch"
 	"runtime/internal/atomic"
 	"unsafe"
 )
@@ -388,10 +387,10 @@ func readmemstats_m(stats *MemStats) {
 	memstats.heapStats.unsafeRead(&consStats)
 
 	// Collect large allocation stats.
-	totalAlloc := uint64(consStats.largeAlloc)
-	nMalloc := uint64(consStats.largeAllocCount)
-	totalFree := uint64(consStats.largeFree)
-	nFree := uint64(consStats.largeFreeCount)
+	totalAlloc := consStats.largeAlloc
+	nMalloc := consStats.largeAllocCount
+	totalFree := consStats.largeFree
+	nFree := consStats.largeFreeCount
 
 	// Collect per-sizeclass stats.
 	var bySize [_NumSizeClasses]struct {
@@ -403,13 +402,13 @@ func readmemstats_m(stats *MemStats) {
 		bySize[i].Size = uint32(class_to_size[i])
 
 		// Malloc stats.
-		a := uint64(consStats.smallAllocCount[i])
+		a := consStats.smallAllocCount[i]
 		totalAlloc += a * uint64(class_to_size[i])
 		nMalloc += a
 		bySize[i].Mallocs = a
 
 		// Free stats.
-		f := uint64(consStats.smallFreeCount[i])
+		f := consStats.smallFreeCount[i]
 		totalFree += f * uint64(class_to_size[i])
 		nFree += f
 		bySize[i].Frees = f
@@ -421,8 +420,8 @@ func readmemstats_m(stats *MemStats) {
 	// memory in some sense because their tiny allocation block is also
 	// counted. Tracking the lifetime of individual tiny allocations is
 	// currently not done because it would be too expensive.
-	nFree += uint64(consStats.tinyAllocCount)
-	nMalloc += uint64(consStats.tinyAllocCount)
+	nFree += consStats.tinyAllocCount
+	nMalloc += consStats.tinyAllocCount
 
 	// Calculate derived stats.
 
@@ -663,17 +662,20 @@ type heapStatsDelta struct {
 	inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits
 
 	// Allocator stats.
-	tinyAllocCount  uintptr                  // number of tiny allocations
-	largeAlloc      uintptr                  // bytes allocated for large objects
-	largeAllocCount uintptr                  // number of large object allocations
-	smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
-	largeFree       uintptr                  // bytes freed for large objects (>maxSmallSize)
-	largeFreeCount  uintptr                  // number of frees for large objects (>maxSmallSize)
-	smallFreeCount  [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
-
-	// Add a uint32 to ensure this struct is a multiple of 8 bytes in size.
-	// Only necessary on 32-bit platforms.
-	_ [(goarch.PtrSize / 4) % 2]uint32
+	//
+	// These are all uint64 because they're cumulative, and could quickly wrap
+	// around otherwise.
+	tinyAllocCount  uint64                  // number of tiny allocations
+	largeAlloc      uint64                  // bytes allocated for large objects
+	largeAllocCount uint64                  // number of large object allocations
+	smallAllocCount [_NumSizeClasses]uint64 // number of allocs for small objects
+	largeFree       uint64                  // bytes freed for large objects (>maxSmallSize)
+	largeFreeCount  uint64                  // number of frees for large objects (>maxSmallSize)
+	smallFreeCount  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxSmallSize)
+
+	// NOTE: This struct must be a multiple of 8 bytes in size because it
+	// is stored in an array. If it's not, atomic accesses to the above
+	// fields may be unaligned and fail on 32-bit platforms.
 }
 
 // merge adds in the deltas from b into a.