diff --git a/test/benchmark/suite/render.go b/test/benchmark/suite/render.go
index 413d02502d..20056860de 100644
--- a/test/benchmark/suite/render.go
+++ b/test/benchmark/suite/render.go
@@ -11,59 +11,14 @@ import (
"bytes"
"fmt"
"io"
+ "math"
"os"
- "strconv"
+ "path"
+ "sort"
"strings"
"text/tabwriter"
)
-const (
- omitEmptyValue = "-"
- benchmarkEnvPrefix = "BENCHMARK_"
-
- querySum = "Sum"
- queryAvg = "Avg"
- queryMin = "Min"
- queryMax = "Max"
-)
-
-type tableHeader struct {
- name string
- unit string
- promQL string // only valid for metrics table
- queryType string
-}
-
-var metricsTableHeader = []tableHeader{
- {
- name: "Test Name",
- },
- {
- name: "Envoy Gateway Memory",
- unit: "MiB",
- promQL: `process_resident_memory_bytes{namespace="envoy-gateway-system", control_plane="envoy-gateway"}/1024/1024`,
- queryType: querySum,
- },
- {
- name: "Envoy Gateway CPU",
- unit: "s",
- promQL: `process_cpu_seconds_total{namespace="envoy-gateway-system", control_plane="envoy-gateway"}`,
- queryType: querySum,
- },
- {
- name: "Envoy Proxy Memory (Avg)",
- unit: "MiB",
- promQL: `container_memory_working_set_bytes{namespace="envoy-gateway-system",container="envoy"}/1024/1024`,
- queryType: queryAvg,
- },
- {
- name: "Envoy Proxy CPU (Avg)",
- unit: "s",
- promQL: `container_cpu_usage_seconds_total{namespace="envoy-gateway-system",container="envoy"}`,
- queryType: queryAvg,
- },
-}
-
// RenderReport renders a report out of given list of benchmark report in Markdown format.
func RenderReport(writer io.Writer, name, description string, titleLevel int, reports []*BenchmarkReport) error {
writeSection(writer, "Test: "+name, titleLevel, description)
@@ -73,11 +28,12 @@ func RenderReport(writer io.Writer, name, description string, titleLevel int, re
return err
}
- writeSection(writer, "Metrics", titleLevel+1, "")
+ writeSection(writer, "Metrics", titleLevel+1,
+ "The CPU usage statistics of both control-plane and data-plane are the CPU usage per second over the past 30 seconds.")
renderMetricsTable(writer, reports)
writeSection(writer, "Profiles", titleLevel+1, renderProfilesNote())
- renderProfilesTable(writer, "Memory", "heap", titleLevel+2, reports)
+ renderProfilesTable(writer, "Heap", "heap", titleLevel+2, reports)
return nil
}
@@ -90,22 +46,23 @@ func newMarkdownStyleTableWriter(writer io.Writer) *tabwriter.Writer {
func renderEnvSettingsTable(writer io.Writer) {
table := newMarkdownStyleTableWriter(writer)
- headers := []tableHeader{
- {name: "RPS"},
- {name: "Connections"},
- {name: "Duration", unit: "s"},
- {name: "CPU Limits", unit: "m"},
- {name: "Memory Limits", unit: "MiB"},
+ headers := []string{
+ "RPS",
+ "Connections",
+ "Duration (Seconds)",
+ "CPU Limits (m)",
+ "Memory Limits (MiB)",
}
writeTableHeader(table, headers)
- writeTableRow(table, headers, func(_ int, h tableHeader) string {
- env := strings.ReplaceAll(strings.ToUpper(h.name), " ", "_")
- if v, ok := os.LookupEnv(benchmarkEnvPrefix + env); ok {
- return v
- }
- return omitEmptyValue
- })
+ data := []string{
+ os.Getenv("BENCHMARK_RPS"),
+ os.Getenv("BENCHMARK_CONNECTIONS"),
+ os.Getenv("BENCHMARK_DURATION"),
+ os.Getenv("BENCHMARK_CPU_LIMITS"),
+ os.Getenv("BENCHMARK_MEMORY_LIMITS"),
+ }
+ writeTableRow(table, data)
_ = table.Flush()
}
@@ -129,20 +86,20 @@ func renderResultsTable(writer io.Writer, reports []*BenchmarkReport) error {
func renderMetricsTable(writer io.Writer, reports []*BenchmarkReport) {
table := newMarkdownStyleTableWriter(writer)
- writeTableHeader(table, metricsTableHeader)
+ // write headers
+ headers := []string{
+ "Test Name",
+ "Envoy Gateway Memory (MiB)
min/max/means",
+ "Envoy Gateway CPU (%)
min/max/means",
+ "Averaged Envoy Proxy Memory (MiB)
min/max/means",
+ "Averaged Envoy Proxy CPU (%)
min/max/means",
+ }
+ writeTableHeader(table, headers)
for _, report := range reports {
- writeTableRow(table, metricsTableHeader, func(_ int, h tableHeader) string {
- if len(h.promQL) == 0 {
- return report.Name
- }
-
- if v, ok := report.Metrics[h.name]; ok {
- return strconv.FormatFloat(v, 'f', -1, 64)
- }
-
- return omitEmptyValue
- })
+ data := []string{report.Name}
+ data = append(data, getSamplesMinMaxMeans(report.Samples)...)
+ writeTableRow(table, data)
}
_ = table.Flush()
@@ -156,19 +113,32 @@ You can visualize them in a web page by running:
%s
Currently, the supported profile types are:
-- heap
+- heap (memory)
`, "`/profiles`", "`{ProfileType}.{TestCase}.pprof`", "```shell\ngo tool pprof -http=: path/to/your.pprof\n```")
}
func renderProfilesTable(writer io.Writer, target, key string, titleLevel int, reports []*BenchmarkReport) {
- writeSection(writer, target, titleLevel, "")
+ writeSection(writer, target, titleLevel,
+ "The profiles were sampled when Envoy Gateway Memory is at its maximum.")
for _, report := range reports {
+ // Get the heap profile when control plane memory is at its maximum.
+ sortedSamples := make([]BenchmarkMetricSample, len(report.Samples))
+ copy(sortedSamples, report.Samples)
+ sort.Slice(sortedSamples, func(i, j int) bool {
+ return sortedSamples[i].ControlPlaneMem > sortedSamples[j].ControlPlaneMem
+ })
+
+ heapPprof := sortedSamples[0].HeapProfile
+ heapPprofPath := path.Join(report.ProfilesOutputDir, fmt.Sprintf("heap.%s.pprof", report.Name))
+ _ = os.WriteFile(heapPprofPath, heapPprof, 0o600)
+
// The image is not be rendered yet, so it is a placeholder for the path.
// The image will be rendered after the test has finished.
+ rootDir := strings.SplitN(heapPprofPath, "/", 2)[0]
+ heapPprofPath = strings.TrimPrefix(heapPprofPath, rootDir+"/")
writeSection(writer, report.Name, titleLevel+1,
- fmt.Sprintf("", key, report.Name,
- strings.TrimSuffix(report.ProfilesPath[key], ".pprof")))
+ fmt.Sprintf("", key, report.Name, strings.TrimSuffix(heapPprofPath, ".pprof")))
}
}
@@ -194,21 +164,16 @@ func writeCollapsibleSection(writer io.Writer, title string, content []byte) {
`, title, summary)
}
-func writeTableHeader(table *tabwriter.Writer, headers []tableHeader) {
- writeTableRow(table, headers, func(_ int, h tableHeader) string {
- if len(h.unit) > 0 {
- return fmt.Sprintf("%s (%s)", h.name, h.unit)
- }
- return h.name
- })
+func writeTableHeader(table *tabwriter.Writer, headers []string) {
+ writeTableRow(table, headers)
writeTableDelimiter(table, len(headers))
}
-// writeTableRow writes one row in Markdown table style according to headers.
-func writeTableRow(table *tabwriter.Writer, headers []tableHeader, on func(int, tableHeader) string) {
+// writeTableRow writes one row in Markdown table style.
+func writeTableRow(table *tabwriter.Writer, data []string) {
row := "|"
- for i, v := range headers {
- row += on(i, v) + "\t"
+ for _, v := range data {
+ row += v + "\t"
}
_, _ = fmt.Fprintln(table, row)
@@ -223,3 +188,40 @@ func writeTableDelimiter(table *tabwriter.Writer, n int) {
_, _ = fmt.Fprintln(table, sep)
}
+
+func getSamplesMinMaxMeans(samples []BenchmarkMetricSample) []string {
+ cpMem := make([]float64, 0, len(samples))
+ cpCPU := make([]float64, 0, len(samples))
+ dpMem := make([]float64, 0, len(samples))
+ dpCPU := make([]float64, 0, len(samples))
+ for _, sample := range samples {
+ cpMem = append(cpMem, sample.ControlPlaneMem)
+ cpCPU = append(cpCPU, sample.ControlPlaneCPU)
+ dpMem = append(dpMem, sample.DataPlaneMem)
+ dpCPU = append(dpCPU, sample.DataPlaneCPU)
+ }
+
+ return []string{
+ getMetricsMinMaxMeans(cpMem),
+ getMetricsMinMaxMeans(cpCPU),
+ getMetricsMinMaxMeans(dpMem),
+ getMetricsMinMaxMeans(dpCPU),
+ }
+}
+
+func getMetricsMinMaxMeans(metrics []float64) string {
+ var min, max, avg float64 = math.MaxFloat64, 0, 0
+ for _, v := range metrics {
+ min = math.Min(v, min)
+ max = math.Max(v, max)
+ avg += v
+ }
+ if min == math.MaxFloat64 {
+ min = 0
+ }
+ if len(metrics) > 0 {
+ avg /= float64(len(metrics))
+ }
+
+ return fmt.Sprintf("%.2f / %.2f / %.2f", min, max, avg)
+}
diff --git a/test/benchmark/suite/report.go b/test/benchmark/suite/report.go
index b159e79860..1ff4750d44 100644
--- a/test/benchmark/suite/report.go
+++ b/test/benchmark/suite/report.go
@@ -10,12 +10,9 @@ package suite
import (
"bytes"
"context"
+ "errors"
"fmt"
"io"
- "os"
- "path"
- "strconv"
- "strings"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -26,46 +23,57 @@ import (
prom "github.com/envoyproxy/gateway/test/utils/prometheus"
)
+const (
+ controlPlaneMemQL = `process_resident_memory_bytes{namespace="envoy-gateway-system", control_plane="envoy-gateway"}/1024/1024`
+ controlPlaneCPUQL = `rate(process_cpu_seconds_total{namespace="envoy-gateway-system", control_plane="envoy-gateway"}[30s])*100`
+ dataPlaneMemQL = `container_memory_working_set_bytes{namespace="envoy-gateway-system", container="envoy"}/1024/1024`
+ dataPlaneCPUQL = `rate(container_cpu_usage_seconds_total{namespace="envoy-gateway-system", container="envoy"}[30s])*100`
+)
+
+// BenchmarkMetricSample contains sampled metrics and profiles data.
+type BenchmarkMetricSample struct {
+ ControlPlaneMem float64
+ ControlPlaneCPU float64
+ DataPlaneMem float64
+ DataPlaneCPU float64
+
+ HeapProfile []byte
+}
+
type BenchmarkReport struct {
Name string
- Result []byte
- Metrics map[string]float64 // metricTableHeaderName:metricValue
- ProfilesPath map[string]string // profileKey:profileFilepath
ProfilesOutputDir string
+ // Nighthawk benchmark result
+ Result []byte
+ // Prometheus metrics and pprof profiles sampled data
+ Samples []BenchmarkMetricSample
kubeClient kube.CLIClient
promClient *prom.Client
}
-func NewBenchmarkReport(name, profilesOutputDir string, kubeClient kube.CLIClient, promClient *prom.Client) (*BenchmarkReport, error) {
- if err := createDirIfNotExist(profilesOutputDir); err != nil {
- return nil, err
- }
-
+func NewBenchmarkReport(name, profilesOutputDir string, kubeClient kube.CLIClient, promClient *prom.Client) *BenchmarkReport {
return &BenchmarkReport{
Name: name,
- Metrics: make(map[string]float64),
- ProfilesPath: make(map[string]string),
ProfilesOutputDir: profilesOutputDir,
kubeClient: kubeClient,
promClient: promClient,
- }, nil
+ }
}
-func (r *BenchmarkReport) Collect(ctx context.Context, job *types.NamespacedName) error {
- if err := r.GetProfiles(ctx); err != nil {
- return err
- }
+func (r *BenchmarkReport) Sample(ctx context.Context) (err error) {
+ sample := BenchmarkMetricSample{}
- if err := r.GetMetrics(ctx); err != nil {
- return err
+ if mErr := r.sampleMetrics(ctx, &sample); mErr != nil {
+ err = errors.Join(mErr)
}
- if err := r.GetResult(ctx, job); err != nil {
- return err
+ if pErr := r.sampleProfiles(ctx, &sample); pErr != nil {
+ err = errors.Join(pErr)
}
- return nil
+ r.Samples = append(r.Samples, sample)
+ return err
}
func (r *BenchmarkReport) GetResult(ctx context.Context, job *types.NamespacedName) error {
@@ -97,34 +105,34 @@ func (r *BenchmarkReport) GetResult(ctx context.Context, job *types.NamespacedNa
return nil
}
-func (r *BenchmarkReport) GetMetrics(ctx context.Context) error {
- for _, h := range metricsTableHeader {
- if len(h.promQL) == 0 {
- continue
- }
-
- var (
- v float64
- err error
- )
- switch h.queryType {
- case querySum:
- v, err = r.promClient.QuerySum(ctx, h.promQL)
- case queryAvg:
- v, err = r.promClient.QueryAvg(ctx, h.promQL)
- default:
- return fmt.Errorf("unsupported query type: %s", h.queryType)
- }
-
- if err == nil {
- r.Metrics[h.name], _ = strconv.ParseFloat(fmt.Sprintf("%.2f", v), 64)
- }
+func (r *BenchmarkReport) sampleMetrics(ctx context.Context, sample *BenchmarkMetricSample) (err error) {
+ // Sample memory
+ cpMem, qErr := r.promClient.QuerySum(ctx, controlPlaneMemQL)
+ if qErr != nil {
+ err = errors.Join(fmt.Errorf("failed to query control plane memory: %w", err))
+ }
+ dpMem, qErr := r.promClient.QueryAvg(ctx, dataPlaneMemQL)
+ if qErr != nil {
+ err = errors.Join(fmt.Errorf("failed to query data plane memory: %w", err))
+ }
+ // Sample cpu
+ cpCPU, qErr := r.promClient.QuerySum(ctx, controlPlaneCPUQL)
+ if qErr != nil {
+ err = errors.Join(fmt.Errorf("failed to query control plane cpu: %w", err))
+ }
+ dpCPU, qErr := r.promClient.QueryAvg(ctx, dataPlaneCPUQL)
+ if qErr != nil {
+ err = errors.Join(fmt.Errorf("failed to query data plane cpu: %w", err))
}
- return nil
+ sample.ControlPlaneMem = cpMem
+ sample.ControlPlaneCPU = cpCPU
+ sample.DataPlaneMem = dpMem
+ sample.DataPlaneCPU = dpCPU
+ return err
}
-func (r *BenchmarkReport) GetProfiles(ctx context.Context) error {
+func (r *BenchmarkReport) sampleProfiles(ctx context.Context, sample *BenchmarkMetricSample) error {
egPod, err := r.fetchEnvoyGatewayPod(ctx)
if err != nil {
return err
@@ -138,16 +146,7 @@ func (r *BenchmarkReport) GetProfiles(ctx context.Context) error {
return err
}
- heapProfPath := path.Join(r.ProfilesOutputDir, fmt.Sprintf("heap.%s.pprof", r.Name))
- if err = os.WriteFile(heapProfPath, heapProf, 0o600); err != nil {
- return fmt.Errorf("failed to write profiles %s: %w", heapProfPath, err)
- }
-
- // Remove parent output report dir.
- splits := strings.SplitN(heapProfPath, "/", 2)[0]
- heapProfPath = strings.TrimPrefix(heapProfPath, splits+"/")
- r.ProfilesPath["heap"] = heapProfPath
-
+ sample.HeapProfile = heapProf
return nil
}
diff --git a/test/benchmark/suite/suite.go b/test/benchmark/suite/suite.go
index 80a6ff1daf..e8a045bec1 100644
--- a/test/benchmark/suite/suite.go
+++ b/test/benchmark/suite/suite.go
@@ -32,9 +32,10 @@ import (
)
const (
- BenchmarkTestScaledKey = "benchmark-test/scaled"
- BenchmarkTestClientKey = "benchmark-test/client"
- DefaultControllerName = "gateway.envoyproxy.io/gatewayclass-controller"
+ BenchmarkTestScaledKey = "benchmark-test/scaled"
+ BenchmarkTestClientKey = "benchmark-test/client"
+ BenchmarkMetricsSampleTick = 3 * time.Second
+ DefaultControllerName = "gateway.envoyproxy.io/gatewayclass-controller"
)
type BenchmarkTest struct {
@@ -200,8 +201,14 @@ func (b *BenchmarkTestSuite) Benchmark(t *testing.T, ctx context.Context, jobNam
return nil, err
}
+ profilesOutputDir := path.Join(b.ReportSaveDir, "profiles")
+ if err := createDirIfNotExist(profilesOutputDir); err != nil {
+ return nil, err
+ }
+
// Wait from benchmark test job to complete.
- if err = wait.PollUntilContextTimeout(ctx, 6*time.Second, time.Duration(duration*10)*time.Second, true, func(ctx context.Context) (bool, error) {
+ report := NewBenchmarkReport(resultTitle, profilesOutputDir, b.kubeClient, b.promClient)
+ if err = wait.PollUntilContextTimeout(ctx, BenchmarkMetricsSampleTick, time.Duration(duration*10)*time.Second, true, func(ctx context.Context) (bool, error) {
job := new(batchv1.Job)
if err = b.Client.Get(ctx, *jobNN, job); err != nil {
return false, err
@@ -221,6 +228,12 @@ func (b *BenchmarkTestSuite) Benchmark(t *testing.T, ctx context.Context, jobNam
t.Logf("Job %s still not complete", jobName)
+ // Sample the metrics and profiles at runtime.
+ // Do not consider it as an error, fail sampling should not affect test running.
+ if err := report.Sample(ctx); err != nil {
+ t.Logf("Error occurs while sampling metrics or profiles: %v", err)
+ }
+
return false, nil
}); err != nil {
t.Errorf("Failed to run benchmark test: %v", err)
@@ -230,13 +243,8 @@ func (b *BenchmarkTestSuite) Benchmark(t *testing.T, ctx context.Context, jobNam
t.Logf("Running benchmark test: %s successfully", resultTitle)
- report, err := NewBenchmarkReport(resultTitle, path.Join(b.ReportSaveDir, "profiles"), b.kubeClient, b.promClient)
- if err != nil {
- return nil, fmt.Errorf("failed to create benchmark report: %w", err)
- }
-
- // Get all the reports from this benchmark test run.
- if err = report.Collect(ctx, jobNN); err != nil {
+ // Get nighthawk result from this benchmark test run.
+ if err = report.GetResult(ctx, jobNN); err != nil {
return nil, err
}