Skip to content

perf(recover): replace bytes with strings in function for better performance#4252

Merged
appleboy merged 1 commit intogin-gonic:masterfrom
1911860538:feat/recovery
May 31, 2025
Merged

perf(recover): replace bytes with strings in function for better performance#4252
appleboy merged 1 commit intogin-gonic:masterfrom
1911860538:feat/recovery

Conversation

@1911860538
Copy link
Copy Markdown
Contributor

Below is my local test code.

recover/original.go

package recover

import (
    "bytes"
    "fmt"
    "os"
    "runtime"
)

var (
    dunno     = []byte("???")
    centerDot = []byte("·")
    dot       = []byte(".")
    slash     = []byte("/")
)

// stack returns a nicely formatted stack frame, skipping skip frames.
func stack(skip int) []byte {
    buf := new(bytes.Buffer) // the returned data
    // As we loop, we open files and read them. These variables record the currently
    // loaded file.
    var lines [][]byte
    var lastFile string
    for i := skip; ; i++ { // Skip the expected number of frames
        pc, file, line, ok := runtime.Caller(i)
        if !ok {
            break
        }
        // Print this much at least.  If we can't find the source, it won't show.
        fmt.Fprintf(buf, "%s:%d (0x%x)\n", file, line, pc)
        if file != lastFile {
            data, err := os.ReadFile(file)
            if err != nil {
                continue
            }
            lines = bytes.Split(data, []byte{'\n'})
            lastFile = file
        }
        fmt.Fprintf(buf, "\t%s: %s\n", function(pc), source(lines, line))
    }
    return buf.Bytes()
}

// source returns a space-trimmed slice of the n'th line.
func source(lines [][]byte, n int) []byte {
    n-- // in stack trace, lines are 1-indexed but our array is 0-indexed
    if n < 0 || n >= len(lines) {
        return dunno
    }
    return bytes.TrimSpace(lines[n])
}

// function returns, if possible, the name of the function containing the PC.
func function(pc uintptr) []byte {
    fn := runtime.FuncForPC(pc)
    if fn == nil {
        return dunno
    }
    name := []byte(fn.Name())
    // The name includes the path name to the package, which is unnecessary
    // since the file name is already included.  Plus, it has center dots.
    // That is, we see
    //    runtime/debug.*T·ptrmethod
    // and want
    //    *T.ptrmethod
    // Also the package path might contain dot (e.g. code.google.com/...),
    // so first eliminate the path prefix
    if lastSlash := bytes.LastIndex(name, slash); lastSlash >= 0 {
        name = name[lastSlash+1:]
    }
    if period := bytes.Index(name, dot); period >= 0 {
        name = name[period+1:]
    }
    name = bytes.ReplaceAll(name, centerDot, dot)
    return name
}

recover/optimized.go

package recover

import (
    "bytes"
    "fmt"
    "os"
    "runtime"
    "strings"
)

const dunno1 = "???"

// stack returns a nicely formatted stack frame, skipping skip frames.
func stack1(skip int) []byte {
    buf := new(bytes.Buffer) // the returned data
    // As we loop, we open files and read them. These variables record the currently
    // loaded file.
    var lines [][]byte
    var lastFile string
    for i := skip; ; i++ { // Skip the expected number of frames
        pc, file, line, ok := runtime.Caller(i)
        if !ok {
            break
        }
        // Print this much at least.  If we can't find the source, it won't show.
        fmt.Fprintf(buf, "%s:%d (0x%x)\n", file, line, pc)
        if file != lastFile {
            data, err := os.ReadFile(file)
            if err != nil {
                continue
            }
            lines = bytes.Split(data, []byte{'\n'})
            lastFile = file
        }
        fmt.Fprintf(buf, "\t%s: %s\n", function1(pc), source(lines, line))
    }
    return buf.Bytes()
}

// function returns, if possible, the name of the function containing the PC.
func function1(pc uintptr) string {
    fn := runtime.FuncForPC(pc)
    if fn == nil {
        return dunno1
    }
    name := fn.Name()
    // The name includes the path name to the package, which is unnecessary
    // since the file name is already included.  Plus, it has center dots.
    // That is, we see
    //    runtime/debug.*T·ptrmethod
    // and want
    //    *T.ptrmethod
    // Also the package path might contain dot (e.g. code.google.com/...),
    // so first eliminate the path prefix
    if lastSlash := strings.LastIndexByte(name, '/'); lastSlash >= 0 {
        name = name[lastSlash+1:]
    }
    if period := strings.IndexByte(name, '.'); period >= 0 {
        name = name[period+1:]
    }
    name = strings.ReplaceAll(name, "·", ".")
    return name
}

recover/function_name.go

package recover

import (
    "bytes"
    "strings"
)

func fnName(nameStr string) []byte {
    name := []byte(nameStr)
    if lastSlash := bytes.LastIndex(name, slash); lastSlash >= 0 {
        name = name[lastSlash+1:]
    }
    if period := bytes.Index(name, dot); period >= 0 {
        name = name[period+1:]
    }
    name = bytes.ReplaceAll(name, centerDot, dot)
    return name
}

func fnName1(name string) string {
    if lastSlash := strings.LastIndexByte(name, '/'); lastSlash >= 0 {
        name = name[lastSlash+1:]
    }
    if period := strings.IndexByte(name, '.'); period >= 0 {
        name = name[period+1:]
    }
    name = strings.ReplaceAll(name, "·", ".")
    return name
}

recover/stack_test.go

package recover

import (
    "runtime"
    "testing"
)

func BenchmarkFnName(b *testing.B) {
    name := "a·b.c/d"

    b.Run("original", func(b *testing.B) {
        b.ReportAllocs()

        for i := 0; i < b.N; i++ {
            fnName(name)
        }
    })

    b.Run("optimized", func(b *testing.B) {
        b.ReportAllocs()

        for i := 0; i < b.N; i++ {
            fnName1(name)
        }
    })
}

func BenchmarkFunction(b *testing.B) {
    pc, _, _, _ := runtime.Caller(2)

    b.Run("original", func(b *testing.B) {
        b.ReportAllocs()

        for i := 0; i < b.N; i++ {
            _ = function(pc)
        }
    })

    b.Run("optimized", func(b *testing.B) {
        b.ReportAllocs()

        for i := 0; i < b.N; i++ {
            _ = function1(pc)
        }
    })
}

func BenchmarkStack(b *testing.B) {
    b.Run("original", func(b *testing.B) {
        b.ReportAllocs()

        for i := 0; i < b.N; i++ {
            _ = stack(2)
        }
    })

    b.Run("optimized", func(b *testing.B) {
        b.ReportAllocs()

        for i := 0; i < b.N; i++ {
            _ = stack1(2)
        }
    })
}

benchmark output

goos: darwin
goarch: arm64
pkg: test_code/test_gin/recover
cpu: Apple M3 Pro
BenchmarkFnName
BenchmarkFnName/original
BenchmarkFnName/original-11         	38319324	        29.97 ns/op	      16 B/op	       2 allocs/op
BenchmarkFnName/optimized
BenchmarkFnName/optimized-11        	160470892	         7.158 ns/op	       0 B/op	       0 allocs/op
BenchmarkFunction
BenchmarkFunction/original
BenchmarkFunction/original-11       	16994358	        71.61 ns/op	      40 B/op	       2 allocs/op
BenchmarkFunction/optimized
BenchmarkFunction/optimized-11      	28404208	        40.54 ns/op	       0 B/op	       0 allocs/op
BenchmarkStack
BenchmarkStack/original
BenchmarkStack/original-11          	   18630	     67586 ns/op	  146613 B/op	      43 allocs/op
BenchmarkStack/optimized
BenchmarkStack/optimized-11         	   17376	     61093 ns/op	  146483 B/op	      37 allocs/op
PASS

Process finished with the exit code 0

@codecov
Copy link
Copy Markdown

codecov Bot commented May 28, 2025

Codecov Report

All modified and coverable lines are covered by tests ✅

Project coverage is 98.92%. Comparing base (3dc1cd6) to head (b8f57f8).
Report is 128 commits behind head on master.

Additional details and impacted files
@@            Coverage Diff             @@
##           master    #4252      +/-   ##
==========================================
- Coverage   99.21%   98.92%   -0.30%     
==========================================
  Files          42       44       +2     
  Lines        3182     3432     +250     
==========================================
+ Hits         3157     3395     +238     
- Misses         17       26       +9     
- Partials        8       11       +3     
Flag Coverage Δ
?
--ldflags="-checklinkname=0" -tags sonic 98.85% <100.00%> (?)
-tags go_json 98.85% <100.00%> (?)
-tags nomsgpack 98.90% <100.00%> (?)
go-1.18 ?
go-1.19 ?
go-1.20 ?
go-1.21 ?
go-1.23 98.92% <100.00%> (?)
go-1.24 98.92% <100.00%> (?)
macos-latest 98.92% <100.00%> (-0.30%) ⬇️
ubuntu-latest 98.92% <100.00%> (-0.30%) ⬇️

Flags with carried forward coverage won't be shown. Click here to find out more.

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

@appleboy appleboy changed the title perf: replace bytes with strings in function for better performance perf(recover): replace bytes with strings in function for better performance May 31, 2025
@appleboy appleboy added this to the v1.11 milestone May 31, 2025
@appleboy appleboy merged commit 3c12d2a into gin-gonic:master May 31, 2025
24 of 25 checks passed
@1911860538
Copy link
Copy Markdown
Contributor Author

I used the load testing tool hey to compare the performance of CustomRecoveryWithWriter before and after my optimization.

test code

package main

import (
	"log"
	"net/http"

	"github.com/gin-gonic/gin"
)

func main() {
	gin.SetMode(gin.ReleaseMode)

	r := gin.New()

	r.Use(gin.CustomRecoveryWithWriter(gin.DefaultErrorWriter, func(c *gin.Context, err any) {
		c.JSON(http.StatusInternalServerError, gin.H{
			"message": "internal error",
		})
	}))

	r.GET("/panic", func(c *gin.Context) {
		panic("trigger panic")
	})

	err := r.Run(":8080")
	if err != nil {
		log.Fatal(err)
	}
}

hey command

hey install

brew install hey

hey run

hey -n 100000 -c 100 http://localhost:8080/panic

benchmark results before optimization using hey

hey -n 100000 -c 100 http://localhost:8080/panic

Summary:
  Total:        26.3267 secs
  Slowest:      0.3569 secs
  Fastest:      0.0005 secs
  Average:      0.0256 secs
  Requests/sec: 3798.4205

  Total data:   3200000 bytes
  Size/request: 32 bytes

Response time histogram:
  0.001 [1]     |
  0.036 [71640] |■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
  0.072 [18683] |■■■■■■■■■■
  0.107 [6305]  |■■■■
  0.143 [2242]  |■
  0.179 [755]   |
  0.214 [257]   |
  0.250 [78]    |
  0.286 [26]    |
  0.321 [9]     |
  0.357 [4]     |


Latency distribution:
  10% in 0.0013 secs
  25% in 0.0025 secs
  50% in 0.0061 secs
  75% in 0.0393 secs
  90% in 0.0707 secs
  95% in 0.0945 secs
  99% in 0.1465 secs

Details (average, fastest, slowest):
  DNS+dialup:   0.0000 secs, 0.0005 secs, 0.3569 secs
  DNS-lookup:   0.0000 secs, 0.0000 secs, 0.0083 secs
  req write:    0.0000 secs, 0.0000 secs, 0.0106 secs
  resp wait:    0.0256 secs, 0.0005 secs, 0.3568 secs
  resp read:    0.0000 secs, 0.0000 secs, 0.0146 secs

Status code distribution:
  [500] 100000 responses

benchmark results after optimization using hey

hey -n 100000 -c 100 http://localhost:8080/panic

Summary:
  Total:        25.3953 secs
  Slowest:      0.4092 secs
  Fastest:      0.0006 secs
  Average:      0.0246 secs
  Requests/sec: 3937.7382

  Total data:   3200000 bytes
  Size/request: 32 bytes

Response time histogram:
  0.001 [1]     |
  0.041 [78627] |■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
  0.082 [14995] |■■■■■■■■
  0.123 [4648]  |■■
  0.164 [1289]  |■
  0.205 [328]   |
  0.246 [82]    |
  0.287 [23]    |
  0.327 [5]     |
  0.368 [1]     |
  0.409 [1]     |


Latency distribution:
  10% in 0.0013 secs
  25% in 0.0025 secs
  50% in 0.0059 secs
  75% in 0.0376 secs
  90% in 0.0682 secs
  95% in 0.0901 secs
  99% in 0.1394 secs

Details (average, fastest, slowest):
  DNS+dialup:   0.0000 secs, 0.0006 secs, 0.4092 secs
  DNS-lookup:   0.0000 secs, 0.0000 secs, 0.0060 secs
  req write:    0.0000 secs, 0.0000 secs, 0.0047 secs
  resp wait:    0.0245 secs, 0.0005 secs, 0.4091 secs
  resp read:    0.0000 secs, 0.0000 secs, 0.0170 secs

Status code distribution:
  [500] 100000 responses

@appleboy
Copy link
Copy Markdown
Member

@1911860538 LGTM. Thanks for your benchmark report.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants