From f8a9d479e20319e42aba02798cca76791d745ad4 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 2 May 2024 15:50:36 -0400 Subject: [PATCH 01/21] Automate scale test Problem: Non-functional scale test needs to be run manually. Solution: - Automate scale test. - Use in-cluster Prometheus to collect CPU, memory and NGF metrics. - Use Kubernetes API server to get NGF logs. For development and troubleshooting, it is possible to run scale test locally in Kind cluster. However, it is necessary to bring down the number of HTTPRoutes to 50 or less (roughly). Testing: - Ran this test locally with 64 listeners, 50 routes and 50 upstreams with NGINX OSS. - Ran this test on GKE with the default configuration with NGINX OSS. Out of scope: ensuring this test runs successfully via GitHub pipeline. Closes https://github.com/nginxinc/nginx-gateway-fabric/issues/1368 Largely based on work by Ciara in https://github.com/nginxinc/nginx-gateway-fabric/pull/1804 Co-authored-by: Ciara Stacke --- .../generate_manifests.go | 183 ++-- tests/framework/portforward.go | 46 +- tests/framework/prometheus.go | 292 +++++++ tests/framework/resourcemanager.go | 107 ++- tests/framework/results.go | 63 +- tests/framework/timeout.go | 4 + tests/scale/manifests/prom-clusterrole.yaml | 44 - tests/scale/scale_test.go | 259 ------ tests/scripts/cpu-plot.gp | 20 + tests/scripts/create-gke-cluster.sh | 14 +- tests/scripts/memory-plot.gp | 21 + tests/scripts/ttr-plot.gp | 18 + tests/scripts/vars.env-example | 2 + tests/suite/dataplane_perf_test.go | 2 +- .../manifests/scale/matches.yaml} | 0 .../manifests/scale/upstreams.yaml} | 0 tests/suite/scale_test.go | 817 ++++++++++++++++++ tests/suite/system_suite_test.go | 41 +- tests/suite/upgrade_test.go | 9 +- 19 files changed, 1514 insertions(+), 428 deletions(-) rename tests/{scale => framework}/generate_manifests.go (66%) create mode 100644 tests/framework/prometheus.go delete mode 100644 tests/scale/manifests/prom-clusterrole.yaml delete mode 100644 tests/scale/scale_test.go create mode 100644 tests/scripts/cpu-plot.gp create mode 100644 tests/scripts/memory-plot.gp create mode 100644 tests/scripts/ttr-plot.gp rename tests/{scale/manifests/scale-matches.yaml => suite/manifests/scale/matches.yaml} (100%) rename tests/{scale/manifests/scale-upstreams.yaml => suite/manifests/scale/upstreams.yaml} (100%) create mode 100644 tests/suite/scale_test.go diff --git a/tests/scale/generate_manifests.go b/tests/framework/generate_manifests.go similarity index 66% rename from tests/scale/generate_manifests.go rename to tests/framework/generate_manifests.go index 88f752141..46c7b92b3 100644 --- a/tests/scale/generate_manifests.go +++ b/tests/framework/generate_manifests.go @@ -1,17 +1,18 @@ -//go:build scale -// +build scale - -package scale +package framework import ( "bytes" + "errors" "fmt" - "os" - "path/filepath" + "io" "text/template" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/util/yaml" + "sigs.k8s.io/controller-runtime/pkg/client" ) -var gwTmplTxt = `apiVersion: gateway.networking.k8s.io/v1 +const gwTmplTxt = `apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: name: gateway @@ -33,7 +34,7 @@ spec: {{- end -}} {{- end -}}` -var hrTmplTxt = `apiVersion: gateway.networking.k8s.io/v1 +const hrTmplTxt = `apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: name: {{ .Name }} @@ -53,7 +54,7 @@ spec: port: 80` // nolint:all -var secretTmplTxt = `apiVersion: v1 +const secretTmplTxt = `apiVersion: v1 kind: Secret metadata: name: {{ . }} @@ -63,8 +64,7 @@ data: tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRQzZtTnJSdUZ2WXZoSE4KbXI3c1FvNUtKSUVDN3N6TFVrNExFeklSNS9yMEVaUjQ2RnRTaGJQd0ZuaXAwMFBxekhpVkhKYy92TjdkQTVLeApQS1VmdFJuQ1J6YldVaTZBZzJpRU93bXF6WUhGbVNpZkFlVjk0RlAxOGtSbjl1ckV3OEpiRXJIUncrVW51L25tCmFMRHF1eGpFTVBweGhuRklCSnYwK1R3djNEVGx6TjNwUlV6dnpidGZvZCtEVTZBSmR6N3Rid1dTNmR6MHc1Z2kKbW9RelZnbFpnVDBJek9FZkV3NVpWMnRMZllHZWRlRVJ1VjhtR041c09va3R2aGxsMU1udHRaMkZNVHgySmVjUQo3K0xBRm9YVnBTS2NjbUFVZ1JBM0xOOHdVZXBVTHZZdFhiUm1QTFc4SjFINmhFeHJHTHBiTERZNmpzbGxBNlZpCk0xMjVjU0hsQWdNQkFBRUNnZ0VBQnpaRE50bmVTdWxGdk9HZlFYaHRFWGFKdWZoSzJBenRVVVpEcUNlRUxvekQKWlV6dHdxbkNRNlJLczUyandWNTN4cU9kUU94bTNMbjNvSHdNa2NZcEliWW82MjJ2dUczYnkwaVEzaFlsVHVMVgpqQmZCcS9UUXFlL2NMdngvSkczQWhFNmJxdFRjZFlXeGFmTmY2eUtpR1dzZk11WVVXTWs4MGVJVUxuRmZaZ1pOCklYNTlSOHlqdE9CVm9Sa3hjYTVoMW1ZTDFsSlJNM3ZqVHNHTHFybmpOTjNBdWZ3ZGRpK1VDbGZVL2l0K1EvZkUKV216aFFoTlRpNVFkRWJLVStOTnYvNnYvb2JvandNb25HVVBCdEFTUE05cmxFemIralQ1WHdWQjgvLzRGY3VoSwoyVzNpcjhtNHVlQ1JHSVlrbGxlLzhuQmZ0eVhiVkNocVRyZFBlaGlPM1FLQmdRRGlrR3JTOTc3cjg3Y1JPOCtQClpoeXltNXo4NVIzTHVVbFNTazJiOTI1QlhvakpZL2RRZDVTdFVsSWE4OUZKZnNWc1JRcEhHaTFCYzBMaTY1YjIKazR0cE5xcVFoUmZ1UVh0UG9GYXRuQzlPRnJVTXJXbDVJN0ZFejZnNkNQMVBXMEg5d2hPemFKZUdpZVpNYjlYTQoybDdSSFZOcC9jTDlYbmhNMnN0Q1lua2Iwd0tCZ1FEUzF4K0crakEyUVNtRVFWNXA1RnRONGcyamsyZEFjMEhNClRIQ2tTazFDRjhkR0Z2UWtsWm5ZbUt0dXFYeXNtekJGcnZKdmt2eUhqbUNYYTducXlpajBEdDZtODViN3BGcVAKQWxtajdtbXI3Z1pUeG1ZMXBhRWFLMXY4SDNINGtRNVl3MWdrTWRybVJHcVAvaTBGaDVpaGtSZS9DOUtGTFVkSQpDcnJjTzhkUVp3S0JnSHA1MzRXVWNCMVZibzFlYStIMUxXWlFRUmxsTWlwRFM2TzBqeWZWSmtFb1BZSEJESnp2ClIrdzZLREJ4eFoyWmJsZ05LblV0YlhHSVFZd3lGelhNcFB5SGxNVHpiZkJhYmJLcDFyR2JVT2RCMXpXM09PRkgKcmppb21TUm1YNmxhaDk0SjRHU0lFZ0drNGw1SHhxZ3JGRDZ2UDd4NGRjUktJWFpLZ0w2dVJSSUpBb0dCQU1CVApaL2p5WStRNTBLdEtEZHUrYU9ORW4zaGxUN3hrNXRKN3NBek5rbWdGMU10RXlQUk9Xd1pQVGFJbWpRbk9qbHdpCldCZ2JGcXg0M2ZlQ1Z4ZXJ6V3ZEM0txaWJVbWpCTkNMTGtYeGh3ZEVteFQwVit2NzZGYzgwaTNNYVdSNnZZR08KditwVVovL0F6UXdJcWZ6dlVmV2ZxdStrMHlhVXhQOGNlcFBIRyt0bEFvR0FmQUtVVWhqeFU0Ym5vVzVwVUhKegpwWWZXZXZ5TW54NWZyT2VsSmRmNzlvNGMvMHhVSjh1eFBFWDFkRmNrZW96dHNpaVFTNkN6MENRY09XVWxtSkRwCnVrdERvVzM3VmNSQU1BVjY3NlgxQVZlM0UwNm5aL2g2Tkd4Z28rT042Q3pwL0lkMkJPUm9IMFAxa2RjY1NLT3kKMUtFZlNnb1B0c1N1eEpBZXdUZmxDMXc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K ` -var appTmplTxt = `apiVersion: v1 -apiVersion: apps/v1 +const appTmplTxt = `apiVersion: apps/v1 kind: Deployment metadata: name: {{ . }} @@ -105,25 +105,55 @@ var ( appTmpl = template.Must(template.New("app").Parse(appTmplTxt)) ) -type Listener struct { +type listener struct { Name string HostnamePrefix string SecretName string } -type Route struct { +type route struct { Name string ListenerName string HostnamePrefix string BackendName string } -func getPrereqDirName(manifestDir string) string { - return filepath.Join(manifestDir, "prereqs") +// ScaleObjects contains objects for scale testing. +type ScaleObjects struct { + // BaseObjects contains objects that are common to all scale iterations. + BaseObjects []client.Object + // ScaleIterationGroups contains objects for each scale iteration. + ScaleIterationGroups [][]client.Object } -func generateScaleListenerManifests(numListeners int, manifestDir string, tls bool) error { - listeners := make([]Listener, 0) +func decodeObjects(reader io.Reader) ([]client.Object, error) { + var objects []client.Object + + decoder := yaml.NewYAMLOrJSONDecoder(reader, 4096) + for { + obj := unstructured.Unstructured{} + if err := decoder.Decode(&obj); err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, fmt.Errorf("error decoding resource: %w", err) + } + + if len(obj.Object) == 0 { + continue + } + + objects = append(objects, &obj) + } + + return objects, nil +} + +// GenerateScaleListenerObjects generates objects for a given number of listeners for the scale test. +func GenerateScaleListenerObjects(numListeners int, tls bool) (ScaleObjects, error) { + var result ScaleObjects + + listeners := make([]listener, 0) backends := make([]string, 0) secrets := make([]string, 0) @@ -138,13 +168,13 @@ func generateScaleListenerManifests(numListeners int, manifestDir string, tls bo secrets = append(secrets, secretName) } - listeners = append(listeners, Listener{ + listeners = append(listeners, listener{ Name: listenerName, HostnamePrefix: hostnamePrefix, SecretName: secretName, }) - route := Route{ + r := route{ Name: fmt.Sprintf("route-%d", i), ListenerName: listenerName, HostnamePrefix: hostnamePrefix, @@ -153,44 +183,57 @@ func generateScaleListenerManifests(numListeners int, manifestDir string, tls bo backends = append(backends, backendName) - if err := generateManifests(manifestDir, i, listeners, []Route{route}); err != nil { - return err + objects, err := generateManifests(listeners, []route{r}) + if err != nil { + return ScaleObjects{}, err } + + result.ScaleIterationGroups = append(result.ScaleIterationGroups, objects) } - if err := generateSecrets(getPrereqDirName(manifestDir), secrets); err != nil { - return err + secretObjects, err := generateSecrets(secrets) + if err != nil { + return ScaleObjects{}, err } - return generateBackendAppManifests(getPrereqDirName(manifestDir), backends) -} + result.BaseObjects = append(result.BaseObjects, secretObjects...) -func generateSecrets(secretsDir string, secrets []string) error { - err := os.Mkdir(secretsDir, 0o750) - if err != nil && !os.IsExist(err) { - return err + backendObjects, err := generateBackendAppObjects(backends) + if err != nil { + return ScaleObjects{}, err } + result.BaseObjects = append(result.BaseObjects, backendObjects...) + + return result, nil +} + +func generateSecrets(secrets []string) ([]client.Object, error) { + objects := make([]client.Object, 0, len(secrets)) + for _, secret := range secrets { var buf bytes.Buffer - if err = secretTmpl.Execute(&buf, secret); err != nil { - return err + if err := secretTmpl.Execute(&buf, secret); err != nil { + return nil, err } - path := filepath.Join(secretsDir, fmt.Sprintf("%s.yaml", secret)) - - fmt.Println("Writing", path) - if err := os.WriteFile(path, buf.Bytes(), 0o600); err != nil { - return err + objs, err := decodeObjects(&buf) + if err != nil { + return nil, err } + + objects = append(objects, objs...) } - return nil + return objects, nil } -func generateScaleHTTPRouteManifests(numRoutes int, manifestDir string) error { - l := Listener{ +// GenerateScaleHTTPRouteObjects generates objects for a given number of routes for the scale test. +func GenerateScaleHTTPRouteObjects(numRoutes int) (ScaleObjects, error) { + var result ScaleObjects + + l := listener{ Name: "listener", HostnamePrefix: "*", } @@ -198,35 +241,43 @@ func generateScaleHTTPRouteManifests(numRoutes int, manifestDir string) error { backendName := "backend" for i := 0; i < numRoutes; i++ { - - route := Route{ + r := route{ Name: fmt.Sprintf("route-%d", i), HostnamePrefix: fmt.Sprintf("%d", i), ListenerName: "listener", BackendName: backendName, } - var listeners []Listener + var listeners []listener if i == 0 { // only generate a Gateway on the first iteration - listeners = []Listener{l} + listeners = []listener{l} } - if err := generateManifests(manifestDir, i, listeners, []Route{route}); err != nil { - return err + objects, err := generateManifests(listeners, []route{r}) + if err != nil { + return ScaleObjects{}, err } + result.ScaleIterationGroups = append(result.ScaleIterationGroups, objects) + } + + backendObjects, err := generateBackendAppObjects([]string{backendName}) + if err != nil { + return ScaleObjects{}, err } - return generateBackendAppManifests(getPrereqDirName(manifestDir), []string{backendName}) + result.BaseObjects = backendObjects + + return result, nil } -func generateManifests(outDir string, version int, listeners []Listener, routes []Route) error { +func generateManifests(listeners []listener, routes []route) ([]client.Object, error) { var buf bytes.Buffer if len(listeners) > 0 { if err := gwTmpl.Execute(&buf, listeners); err != nil { - return err + return nil, err } } @@ -236,42 +287,30 @@ func generateManifests(outDir string, version int, listeners []Listener, routes } if err := hrTmpl.Execute(&buf, r); err != nil { - return err + return nil, err } } - err := os.Mkdir(outDir, 0o750) - if err != nil && !os.IsExist(err) { - return err - } - - filename := fmt.Sprintf("manifest-%d.yaml", version) - path := filepath.Join(outDir, filename) - - fmt.Println("Writing", path) - return os.WriteFile(path, buf.Bytes(), 0o600) + return decodeObjects(&buf) } -func generateBackendAppManifests(outDir string, backends []string) error { - err := os.Mkdir(outDir, 0o750) - if err != nil && !os.IsExist(err) { - return err - } +func generateBackendAppObjects(backends []string) ([]client.Object, error) { + objects := make([]client.Object, 0, 2*len(backends)) for _, backend := range backends { var buf bytes.Buffer - if err = appTmpl.Execute(&buf, backend); err != nil { - return err + if err := appTmpl.Execute(&buf, backend); err != nil { + return nil, err } - path := filepath.Join(outDir, fmt.Sprintf("%s.yaml", backend)) - - fmt.Println("Writing", path) - if err := os.WriteFile(path, buf.Bytes(), 0o600); err != nil { - return err + objs, err := decodeObjects(&buf) + if err != nil { + return nil, err } + + objects = append(objects, objs...) } - return nil + return objects, nil } diff --git a/tests/framework/portforward.go b/tests/framework/portforward.go index 1efc16be3..e8234fea7 100644 --- a/tests/framework/portforward.go +++ b/tests/framework/portforward.go @@ -6,22 +6,25 @@ import ( "net/http" "net/url" "path" + "time" + + "log/slog" "k8s.io/client-go/rest" "k8s.io/client-go/tools/portforward" "k8s.io/client-go/transport/spdy" ) -// PortForward starts a port-forward to the specified Pod and returns the local port being forwarded. -func PortForward(config *rest.Config, namespace, podName string, stopCh chan struct{}) (int, error) { +// PortForward starts a port-forward to the specified Pod. +func PortForward(config *rest.Config, namespace, podName string, ports []string, stopCh <-chan struct{}) error { roundTripper, upgrader, err := spdy.RoundTripperFor(config) if err != nil { - return 0, fmt.Errorf("error creating roundtripper: %w", err) + return fmt.Errorf("error creating roundtripper: %w", err) } serverURL, err := url.Parse(config.Host) if err != nil { - return 0, fmt.Errorf("error parsing rest config host: %w", err) + return fmt.Errorf("error parsing rest config host: %w", err) } serverURL.Path = path.Join( @@ -33,25 +36,34 @@ func PortForward(config *rest.Config, namespace, podName string, stopCh chan str dialer := spdy.NewDialer(upgrader, &http.Client{Transport: roundTripper}, http.MethodPost, serverURL) - readyCh := make(chan struct{}, 1) out, errOut := new(bytes.Buffer), new(bytes.Buffer) - forwarder, err := portforward.New(dialer, []string{":80"}, stopCh, readyCh, out, errOut) - if err != nil { - return 0, fmt.Errorf("error creating port forwarder: %w", err) + forward := func() error { + readyCh := make(chan struct{}, 1) + + forwarder, err := portforward.New(dialer, ports, stopCh, readyCh, out, errOut) + if err != nil { + return fmt.Errorf("error creating port forwarder: %w", err) + } + + return forwarder.ForwardPorts() } go func() { - if err := forwarder.ForwardPorts(); err != nil { - panic(err) + for { + if err := forward(); err != nil { + slog.Error("error forwarding ports", "error", err) + slog.Info("retrying port forward in 100ms...") + } + + select { + case <-stopCh: + return + case <-time.After(100 * time.Millisecond): + // retrying + } } }() - <-readyCh - ports, err := forwarder.GetPorts() - if err != nil { - return 0, fmt.Errorf("error getting ports being forwarded: %w", err) - } - - return int(ports[0].Local), nil + return nil } diff --git a/tests/framework/prometheus.go b/tests/framework/prometheus.go new file mode 100644 index 000000000..eefca29fe --- /dev/null +++ b/tests/framework/prometheus.go @@ -0,0 +1,292 @@ +package framework + +import ( + "context" + "encoding/csv" + "errors" + "fmt" + "log/slog" + "os" + "os/exec" + "time" + + "github.com/prometheus/client_golang/api" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + prometheusNamespace = "prom" + prometheusReleaseName = "prom" +) + +var defaultPrometheusQueryTimeout = 2 * time.Second + +// PrometheusConfig is the configuration for installing Prometheus +type PrometheusConfig struct { + // ScrapeInterval is the interval at which Prometheus scrapes metrics. + ScrapeInterval time.Duration + // QueryTimeout is the timeout for Prometheus queries. + // Default is 2s. + QueryTimeout time.Duration +} + +// InstallPrometheus installs Prometheus in the cluster. +// It waits for Prometheus pods to be ready before returning. +func InstallPrometheus( + ctx context.Context, + rm ResourceManager, + cfg PrometheusConfig, +) (PrometheusInstance, error) { + output, err := exec.Command( + "helm", + "repo", + "add", + "prometheus-community", + "https://prometheus-community.github.io/helm-charts", + ).CombinedOutput() + if err != nil { + return PrometheusInstance{}, fmt.Errorf("failed to add Prometheus helm repo: %w; output: %s", err, string(output)) + } + + output, err = exec.Command( + "helm", + "repo", + "update", + ).CombinedOutput() + if err != nil { + return PrometheusInstance{}, fmt.Errorf("failed to update helm repos: %w; output: %s", err, string(output)) + } + + scrapeInterval := fmt.Sprintf("%ds", int(cfg.ScrapeInterval.Seconds())) + + output, err = exec.Command( + "helm", + "install", + prometheusReleaseName, + "prometheus-community/prometheus", + "--create-namespace", + "--namespace", prometheusNamespace, + "--set", fmt.Sprintf("server.global.scrape_interval=%s", scrapeInterval), + "--wait", + ).CombinedOutput() + if err != nil { + return PrometheusInstance{}, fmt.Errorf("failed to install Prometheus: %w; output: %s", err, string(output)) + } + + pods, err := rm.GetPods(prometheusNamespace, client.MatchingLabels{ + "app.kubernetes.io/name": "prometheus", + }) + if err != nil { + return PrometheusInstance{}, fmt.Errorf("failed to get Prometheus pods: %w", err) + } + + if len(pods) == 0 { + return PrometheusInstance{}, errors.New("no Prometheus pods found") + } + + if len(pods) > 1 { + return PrometheusInstance{}, errors.New("multiple Prometheus pods found, expected one") + } + + pod := pods[0] + + if pod.Status.PodIP == "" { + return PrometheusInstance{}, errors.New("Prometheus pod has no IP") + } + + var queryTimeout time.Duration + if cfg.QueryTimeout == 0 { + queryTimeout = defaultPrometheusQueryTimeout + } else { + queryTimeout = cfg.QueryTimeout + } + + return PrometheusInstance{ + podIP: pod.Status.PodIP, + podName: pod.Name, + podNamespace: pod.Namespace, + queryTimeout: queryTimeout, + }, nil +} + +// UninstallPrometheus uninstalls Prometheus from the cluster. +func UninstallPrometheus() error { + output, err := exec.Command( + "helm", + "uninstall", + prometheusReleaseName, + "-n", prometheusNamespace, + ).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to uninstall Prometheus: %w; output: %s", err, string(output)) + } + + return nil +} + +const ( + // PrometheusPortForwardPort is the local port that will forward to the Prometheus API. + PrometheusPortForwardPort = 9090 + prometheusAPIPort = 9090 +) + +// PrometheusInstance represents a Prometheus instance in the cluster. +type PrometheusInstance struct { + podIP string + podName string + podNamespace string + portForward bool + queryTimeout time.Duration + + apiClient v1.API +} + +// PortForward starts port forwarding to the Prometheus instance. +func (ins *PrometheusInstance) PortForward(config *rest.Config, stopCh <-chan struct{}) error { + if ins.portForward { + panic("port forwarding already started") + } + + ins.portForward = true + + ports := []string{fmt.Sprintf("%d:%d", PrometheusPortForwardPort, prometheusAPIPort)} + return PortForward(config, ins.podNamespace, ins.podName, ports, stopCh) +} + +func (ins *PrometheusInstance) getAPIClient() (v1.API, error) { + var endpoint string + if ins.portForward { + endpoint = fmt.Sprintf("http://localhost:%d", PrometheusPortForwardPort) + } else { + // on GKE, test runner VM can access the pod directly + endpoint = fmt.Sprintf("http://%s:%d", ins.podIP, prometheusAPIPort) + } + + cfg := api.Config{ + Address: fmt.Sprintf("%s", endpoint), + } + + c, err := api.NewClient(cfg) + if err != nil { + return nil, err + } + + return v1.NewAPI(c), nil +} + +func (ins *PrometheusInstance) ensureAPIClient() error { + if ins.apiClient == nil { + api, err := ins.getAPIClient() + if err != nil { + return fmt.Errorf("failed to get Prometheus API client: %w", err) + } + ins.apiClient = api + } + + return nil +} + +// Query sends a query to Prometheus. +func (ins *PrometheusInstance) Query(query string) (model.Value, error) { + ctx, cancel := context.WithTimeout(context.Background(), ins.queryTimeout) + defer cancel() + + return ins.QueryWithCtx(ctx, query) +} + +// QueryWithCtx sends a query to Prometheus with the specified context. +func (ins *PrometheusInstance) QueryWithCtx(ctx context.Context, query string) (model.Value, error) { + if err := ins.ensureAPIClient(); err != nil { + return nil, err + } + + result, warnings, err := ins.apiClient.Query(ctx, query, time.Time{}) + if err != nil { + return nil, fmt.Errorf("failed to query Prometheus: %w", err) + } + + if len(warnings) > 0 { + slog.Info("Prometheus query returned warnings", + "query", query, + "warnings", warnings, + ) + } + + return result, nil +} + +// QueryRange sends a range query to Prometheus. +func (ins *PrometheusInstance) QueryRange(query string, promRange v1.Range) (model.Value, error) { + ctx, cancel := context.WithTimeout(context.Background(), ins.queryTimeout) + defer cancel() + + return ins.QueryRangeWithCtx(ctx, query, promRange) +} + +// QueryRangeWithCtx sends a range query to Prometheus with the specified context. +func (ins *PrometheusInstance) QueryRangeWithCtx(ctx context.Context, query string, promRange v1.Range) (model.Value, error) { + if err := ins.ensureAPIClient(); err != nil { + return nil, err + } + + result, warnings, err := ins.apiClient.QueryRange(ctx, query, promRange) + if err != nil { + return nil, fmt.Errorf("failed to query Prometheus: %w", err) + } + + if len(warnings) > 0 { + slog.Info("Prometheus range query returned warnings", + "query", query, + "range", promRange, + "warnings", warnings, + ) + } + + return result, nil +} + +// GetFirstValueOfPrometheusVector returns the first value of a Prometheus vector. +func GetFirstValueOfPrometheusVector(val model.Value) (float64, error) { + res, ok := val.(model.Vector) + if !ok { + return 0, fmt.Errorf("expected a vector, got %T", val) + } + + if len(res) == 0 { + return 0, errors.New("empty vector") + } + + return float64(res[0].Value), nil +} + +// WritePrometheusMatrixToCSVFile writes a Prometheus matrix to a CSV file. +func WritePrometheusMatrixToCSVFile(fileName string, value model.Value) error { + file, err := os.Create(fileName) + if err != nil { + return err + } + defer file.Close() + + csvWriter := csv.NewWriter(file) + + matrix, ok := value.(model.Matrix) + if !ok { + return fmt.Errorf("expected a matrix, got %T", value) + } + + for _, sample := range matrix { + for _, pair := range sample.Values { + record := []string{fmt.Sprint(pair.Timestamp.Unix()), pair.Value.String()} + if err := csvWriter.Write(record); err != nil { + return err + } + } + } + + csvWriter.Flush() + + return nil +} diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index fdc996c5d..9b5c19475 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -27,9 +27,12 @@ import ( "fmt" "io" "net/http" + "reflect" "strings" "time" + "k8s.io/client-go/util/retry" + apps "k8s.io/api/apps/v1" core "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -71,7 +74,17 @@ func (rm *ResourceManager) Apply(resources []client.Object) error { defer cancel() for _, resource := range resources { - if err := rm.K8sClient.Get(ctx, client.ObjectKeyFromObject(resource), resource); err != nil { + var obj client.Object + + unstructuredObj, ok := resource.(*unstructured.Unstructured) + if ok { + obj = unstructuredObj.DeepCopy() + } else { + t := reflect.TypeOf(resource).Elem() + obj = reflect.New(t).Interface().(client.Object) + } + + if err := rm.K8sClient.Get(ctx, client.ObjectKeyFromObject(resource), obj); err != nil { if !apierrors.IsNotFound(err) { return fmt.Errorf("error getting resource: %w", err) } @@ -83,7 +96,19 @@ func (rm *ResourceManager) Apply(resources []client.Object) error { continue } - if err := rm.K8sClient.Update(ctx, resource); err != nil { + // Some tests modify resources that are also modified by NGF (to update their status), so conflicts are possible + // For example, a Gateway resource. + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + if err := rm.K8sClient.Get(ctx, client.ObjectKeyFromObject(resource), obj); err != nil { + return err + } + resource.SetResourceVersion(obj.GetResourceVersion()) + if err := rm.K8sClient.Update(ctx, resource); err != nil { + return err + } + return nil + }) + if err != nil { return fmt.Errorf("error updating resource: %w", err) } } @@ -112,8 +137,19 @@ func (rm *ResourceManager) ApplyFromFiles(files []string, namespace string) erro return nil } - obj.SetResourceVersion(fetchedObj.GetResourceVersion()) - if err := rm.K8sClient.Update(ctx, &obj); err != nil { + // Some tests modify resources that are also modified by NGF (to update their status), so conflicts are possible + // For example, a Gateway resource. + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + if err := rm.K8sClient.Get(ctx, nsName, fetchedObj); err != nil { + return err + } + obj.SetResourceVersion(fetchedObj.GetResourceVersion()) + if err := rm.K8sClient.Update(ctx, &obj); err != nil { + return err + } + return nil + }) + if err != nil { return fmt.Errorf("error updating resource: %w", err) } @@ -137,7 +173,7 @@ func (rm *ResourceManager) Delete(resources []client.Object, opts ...client.Dele return nil } -// DeleteFromFile deletes Kubernetes resources defined within the provided YAML files. +// DeleteFromFiles deletes Kubernetes resources defined within the provided YAML files. func (rm *ResourceManager) DeleteFromFiles(files []string, namespace string) error { handlerFunc := func(obj unstructured.Unstructured) error { obj.SetNamespace(namespace) @@ -241,7 +277,13 @@ func (rm *ResourceManager) WaitForAppsToBeReady(namespace string) error { ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.CreateTimeout) defer cancel() - if err := rm.waitForPodsToBeReady(ctx, namespace); err != nil { + return rm.WaitForAppsToBeReadyWithCtx(ctx, namespace) +} + +// WaitForAppsToBeReadyWithCtx waits for all apps in the specified namespace to be ready or +// until the provided context is cancelled. +func (rm *ResourceManager) WaitForAppsToBeReadyWithCtx(ctx context.Context, namespace string) error { + if err := rm.WaitForPodsToBeReady(ctx, namespace); err != nil { return err } @@ -252,7 +294,9 @@ func (rm *ResourceManager) WaitForAppsToBeReady(namespace string) error { return rm.waitForGatewaysToBeReady(ctx, namespace) } -func (rm *ResourceManager) waitForPodsToBeReady(ctx context.Context, namespace string) error { +// WaitForPodsToBeReady waits for all Pods in the specified namespace to be ready or +// until the provided context is cancelled. +func (rm *ResourceManager) WaitForPodsToBeReady(ctx context.Context, namespace string) error { return wait.PollUntilContextCancel( ctx, 500*time.Millisecond, @@ -447,6 +491,37 @@ func (rm *ResourceManager) GetPodNames(namespace string, labels client.MatchingL return names, nil } +// GetPods returns all Pods in the specified namespace that match the given labels. +func (rm *ResourceManager) GetPods(namespace string, labels client.MatchingLabels) ([]core.Pod, error) { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) + defer cancel() + + var podList core.PodList + if err := rm.K8sClient.List( + ctx, + &podList, + client.InNamespace(namespace), + labels, + ); err != nil { + return nil, fmt.Errorf("error getting list of Pods: %w", err) + } + + return podList.Items, nil +} + +// GetPod returns the Pod in the specified namespace with the given name. +func (rm *ResourceManager) GetPod(namespace, name string) (*core.Pod, error) { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) + defer cancel() + + var pod core.Pod + if err := rm.K8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &pod); err != nil { + return nil, fmt.Errorf("error getting Pod: %w", err) + } + + return &pod, nil +} + // GetPodLogs returns the logs from the specified Pod func (rm *ResourceManager) GetPodLogs(namespace, name string, opts *core.PodLogOptions) (string, error) { ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) @@ -494,6 +569,24 @@ func (rm *ResourceManager) GetNGFDeployment(namespace, releaseName string) (*app return &deployment, nil } +// ScaleDeployment scales the Deployment to the specified number of replicas. +func (rm *ResourceManager) ScaleDeployment(namespace, name string, replicas int32) error { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.UpdateTimeout) + defer cancel() + + var deployment apps.Deployment + if err := rm.K8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &deployment); err != nil { + return fmt.Errorf("error getting Deployment: %w", err) + } + + deployment.Spec.Replicas = &replicas + if err := rm.K8sClient.Update(ctx, &deployment); err != nil { + return fmt.Errorf("error updating Deployment: %w", err) + } + + return nil +} + // GetReadyNGFPodNames returns the name(s) of the NGF Pod(s). func GetReadyNGFPodNames( k8sClient client.Client, diff --git a/tests/framework/results.go b/tests/framework/results.go index 7d5b8ad2e..0e02a0037 100644 --- a/tests/framework/results.go +++ b/tests/framework/results.go @@ -1,6 +1,7 @@ package framework import ( + "encoding/csv" "fmt" "io" "os" @@ -64,24 +65,48 @@ func WriteSystemInfoToFile(file *os.File, ci ClusterInfo, plus bool) error { return nil } -// GeneratePNG generates a PNG using gnuplot. -func GeneratePNG(resultsDir, inputFilename, outputFilename string) ([]byte, error) { +func generatePNG(resultsDir, inputFilename, outputFilename, configFilename string) error { pwd, err := os.Getwd() if err != nil { - return nil, err + return err } - gnuplotCfg := filepath.Join(filepath.Dir(pwd), "scripts", "requests-plot.gp") + gnuplotCfg := filepath.Join(filepath.Dir(pwd), "scripts", configFilename) files := fmt.Sprintf("inputfile='%s';outputfile='%s'", inputFilename, outputFilename) cmd := exec.Command("gnuplot", "-e", files, "-c", gnuplotCfg) cmd.Dir = resultsDir - return cmd.CombinedOutput() + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to generate PNG: %w; output: %s", err, string(output)) + } + + return nil +} + +// GenerateRequestsPNG generates a Requests PNG using gnuplot. +func GenerateRequestsPNG(resultsDir, inputFilename, outputFilename string) error { + return generatePNG(resultsDir, inputFilename, outputFilename, "requests-plot.gpgp") +} + +// GenerateTTRPNG generates a TTR PNG using gnuplot. +func GenerateTTRPNG(resultsDir, inputFilename, outputFilename string) error { + return generatePNG(resultsDir, inputFilename, outputFilename, "ttr-plot.gp") +} + +// GenerateCPUPNG generates a CPU usage PNG using gnuplot. +func GenerateCPUPNG(resultsDir, inputFilename, outputFilename string) error { + return generatePNG(resultsDir, inputFilename, outputFilename, "cpu-plot.gp") } -// WriteResults writes the vegeta metrics results to the results file in text format. -func WriteResults(resultsFile *os.File, metrics *Metrics) error { +// GenerateMemoryPNG generates a Memory usage PNG using gnuplot. +func GenerateMemoryPNG(resultsDir, inputFilename, outputFilename string) error { + return generatePNG(resultsDir, inputFilename, outputFilename, "memory-plot.gp") +} + +// WriteVegetaResults writes the vegeta metrics results to the results file in text format. +func WriteVegetaResults(resultsFile *os.File, metrics *Metrics) error { reporter := vegeta.NewTextReporter(&metrics.Metrics) return reporter.Report(resultsFile) @@ -96,7 +121,27 @@ func WriteContent(resultsFile *os.File, content string) error { return nil } -// NewCSVEncoder returns a vegeta CSV encoder. -func NewCSVEncoder(w io.Writer) vegeta.Encoder { +// NewVegetaCSVEncoder returns a vegeta CSV encoder. +func NewVegetaCSVEncoder(w io.Writer) vegeta.Encoder { return vegeta.NewCSVEncoder(w) } + +// NewCSVResultsWriter creates and returns a CSV results file and writer. +func NewCSVResultsWriter(resultsDir, fileName string, resultHeaders ...string) (*os.File, *csv.Writer, error) { + if err := os.MkdirAll(resultsDir, 0o750); err != nil { + return nil, nil, err + } + + file, err := os.OpenFile(filepath.Join(resultsDir, fileName), os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o644) + if err != nil { + return nil, nil, err + } + + writer := csv.NewWriter(file) + + if err = writer.Write(resultHeaders); err != nil { + return nil, nil, err + } + + return file, writer, nil +} diff --git a/tests/framework/timeout.go b/tests/framework/timeout.go index 2aee2e5a2..4b837c757 100644 --- a/tests/framework/timeout.go +++ b/tests/framework/timeout.go @@ -6,6 +6,9 @@ type TimeoutConfig struct { // CreateTimeout represents the maximum time for a Kubernetes object to be created. CreateTimeout time.Duration + // UpdateTimeout represents the maximum time for a Kubernetes object to be updated. + UpdateTimeout time.Duration + // DeleteTimeout represents the maximum time for a Kubernetes object to be deleted. DeleteTimeout time.Duration @@ -29,6 +32,7 @@ type TimeoutConfig struct { func DefaultTimeoutConfig() TimeoutConfig { return TimeoutConfig{ CreateTimeout: 60 * time.Second, + UpdateTimeout: 60 * time.Second, DeleteTimeout: 10 * time.Second, GetTimeout: 10 * time.Second, ManifestFetchTimeout: 10 * time.Second, diff --git a/tests/scale/manifests/prom-clusterrole.yaml b/tests/scale/manifests/prom-clusterrole.yaml deleted file mode 100644 index f8aefdd36..000000000 --- a/tests/scale/manifests/prom-clusterrole.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: prom ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: prometheus - namespace: prom ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: prometheus - namespace: prom -rules: -- apiGroups: [""] - resources: - - nodes - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: - - configmaps - verbs: ["get"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: prometheus - namespace: prom -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus -subjects: -- kind: ServiceAccount - name: prometheus - namespace: prom diff --git a/tests/scale/scale_test.go b/tests/scale/scale_test.go deleted file mode 100644 index 89f87a1d8..000000000 --- a/tests/scale/scale_test.go +++ /dev/null @@ -1,259 +0,0 @@ -//go:build scale -// +build scale - -package scale - -import ( - "context" - "crypto/tls" - "encoding/csv" - "flag" - "fmt" - "net/http" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" - "testing" - "time" - - "k8s.io/apimachinery/pkg/util/wait" -) - -// testing flags -var ( - numIterations = flag.Int("i", 1, "number of times to scale the resource") - delay = flag.Duration("delay", 0, "delay between each scaling iteration") - version = flag.String("version", "1.2.0", "version of NGF under test") - plus = flag.Bool("plus", false, "nginx-plus enabled") -) - -func TestScale_Listeners(t *testing.T) { - ip := getIP(t) - url := fmt.Sprintf("http://%s/", ip) - - runScaleTest( - t, - []string{"# Listeners", "Time to Ready (s)", "Error"}, - func(dir string) error { - return generateScaleListenerManifests(*numIterations, dir, false /*non-tls*/) - }, - url, - ) -} - -func TestScale_HTTPSListeners(t *testing.T) { - ip := getIP(t) - url := fmt.Sprintf("https://%s/", ip) - - runScaleTest( - t, - []string{"# HTTPS Listeners", "Time to Ready (s)", "Error"}, - func(dir string) error { - return generateScaleListenerManifests(*numIterations, dir, true /*tls*/) - }, - url, - ) -} - -func TestScale_HTTPRoutes(t *testing.T) { - ip := getIP(t) - url := fmt.Sprintf("http://%s/", ip) - - runScaleTest( - t, - []string{"# HTTPRoutes", "Time to Ready (s)", "Error"}, - func(dir string) error { - return generateScaleHTTPRouteManifests(*numIterations, dir) - }, - url, - ) -} - -func runScaleTest( - t *testing.T, - resultHeaders []string, - generateManifests func(dir string) error, - url string, -) { - t.Helper() - manifestDir := t.Name() - - writer := newResultsWriter(t, t.Name(), resultHeaders...) - - if err := generateManifests(manifestDir); err != nil { - t.Fatalf("failed to generate manifests: %s", err) - } - - startTime := time.Now() - startUnix := fmt.Sprintf("%d", startTime.Unix()) - - if err := kubectlApply(getPrereqDirName(manifestDir)); err != nil { - t.Fatalf("failed to apply prerequisite resources: %s", err) - } - - t.Log("Waiting for all Pods to be Ready") - if err := kubectlWaitAllPodsReady(); err != nil { - t.Fatalf("failed to wait for all Pods to be Ready: %s", err) - } - - for i := 0; i < *numIterations; i++ { - t.Logf("Scaling up to %d resources", i) - - manifestFile := filepath.Join(manifestDir, fmt.Sprintf("manifest-%d.yaml", i)) - - if err := kubectlApply(manifestFile); err != nil { - t.Errorf("failed to scale up: %s", err) - } - - host := fmt.Sprintf("%d.example.com", i) - - t.Logf("Sending request to url %s with host %s...", url, host) - - ttr, err := waitForResponseForHost(url, host) - - seconds := ttr.Seconds() - record := []string{strconv.Itoa(i + 1), strconv.FormatFloat(seconds, 'f', -1, 64)} - if err != nil { - record = append(record, err.Error()) - } - - if err = writer.Write(record); err != nil { - t.Fatalf("failed to write time to ready to csv file: %s", err) - } - - time.Sleep(*delay) - } - - endTime := time.Now() - endUnix := fmt.Sprintf("%d", endTime.Unix()) - - // This accounts for prometheus 10s scraping window - endUnixPlusTen := fmt.Sprintf("%d", endTime.Add(10*time.Second).Unix()) - - records := [][]string{ - {"Test Start", "Test End", "Test End + 10s", "Duration"}, - {startUnix, endUnix, endUnixPlusTen, endTime.Sub(startTime).String()}, - } - - if err := writer.WriteAll(records); err != nil { - t.Logf("failed to write records to csv") - } -} - -func getIP(t *testing.T) string { - t.Helper() - - ip := os.Getenv("NGF_IP") - if ip == "" { - t.Fatalf("NGF_IP env var not set") - } - - return ip -} - -func newResultsWriter(t *testing.T, testName string, resultHeaders ...string) *csv.Writer { - t.Helper() - - versionDir := filepath.Join("results", *version) - if err := os.Mkdir(versionDir, 0o750); err != nil && !os.IsExist(err) { - t.Fatalf("failed to create results version directory: %s", err) - } - - testDirName := testName - if *plus { - testDirName += "_Plus" - } - - dir := filepath.Join(versionDir, testDirName) - if err := os.Mkdir(dir, 0o750); err != nil { - t.Fatalf("failed to create results test directory: %s", err) - } - - file, err := os.Create(filepath.Join(dir, "results.csv")) - if err != nil { - t.Fatalf("failed to create results csv file: %s", err) - } - - writer := csv.NewWriter(file) - - if err = writer.Write(resultHeaders); err != nil { - t.Fatalf("failed to write headers to csv file: %s", err) - } - - t.Cleanup(func() { - writer.Flush() - _ = file.Close() - }) - - return writer -} - -func kubectlApply(filename string) error { - if err := kubectlExec("apply", "-f", filename); err != nil { - return fmt.Errorf("error applying %s: %w", filename, err) - } - - return nil -} - -func kubectlWaitAllPodsReady() error { - if err := kubectlExec("wait", "pod", "--all", "--for=condition=Ready"); err != nil { - return fmt.Errorf("error waiting for all pods to be ready:%w", err) - } - - return nil -} - -func kubectlExec(arg ...string) error { - cmd := exec.Command("kubectl", arg...) - - return cmd.Run() -} - -func waitForResponseForHost(url, host string) (time.Duration, error) { - client := &http.Client{} - - if strings.HasPrefix(url, "https") { - customTransport := http.DefaultTransport.(*http.Transport) - customTransport.TLSClientConfig = &tls.Config{ - InsecureSkipVerify: true, // nolint: gosec - ServerName: host, - } - client.Transport = customTransport - } - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - defer cancel() - - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) - if err != nil { - return 0, err - } - - req.Host = host - - start := time.Now() - - err = wait.PollUntilContextCancel( - ctx, - 200*time.Millisecond, - true, - func(ctx context.Context) (done bool, err error) { - resp, err := client.Do(req) - if err != nil { - fmt.Println("Retrying GET request", "error", err) - return false, err - } - - if resp.StatusCode == http.StatusOK { - return true, nil - } - - fmt.Println("Retrying GET request", "host", host, "status", resp.Status) - return false, nil - }) - - return time.Since(start), err -} diff --git a/tests/scripts/cpu-plot.gp b/tests/scripts/cpu-plot.gp new file mode 100644 index 000000000..71584811a --- /dev/null +++ b/tests/scripts/cpu-plot.gp @@ -0,0 +1,20 @@ +set terminal png size 800,600 +set title "CPU Usage" +set datafile separator "," +set output outputfile . "" + +# X-axis settings +set xlabel "Timestamp" +set xdata time +set timefmt "%s" +set format x "%M:%S" +set xrange [*:*] +set xtics nomirror + +# Y-axis settings +set yrange [0:*] +set ylabel "CPU Usage (core seconds)" +set format y "%.2f" + +# Plotting data +plot inputfile using 1:2 with lines lw 2 notitle diff --git a/tests/scripts/create-gke-cluster.sh b/tests/scripts/create-gke-cluster.sh index 9d034e1c6..b5a2061f8 100644 --- a/tests/scripts/create-gke-cluster.sh +++ b/tests/scripts/create-gke-cluster.sh @@ -6,6 +6,16 @@ ip_random_digit=$((1 + $RANDOM % 250)) IS_CI=${1:-false} +if [ -z "$GKE_MACHINE_TYPE" ]; then + # If the environment variable is not set, use a default value + GKE_MACHINE_TYPE="e2-medium" +fi + +if [ -z "$GKE_NUM_NODES" ]; then + # If the environment variable is not set, use a default value + GKE_NUM_NODES="3" +fi + gcloud container clusters create ${GKE_CLUSTER_NAME} \ --project ${GKE_PROJECT} \ --zone ${GKE_CLUSTER_ZONE} \ @@ -16,7 +26,9 @@ gcloud container clusters create ${GKE_CLUSTER_NAME} \ --master-ipv4-cidr 172.16.${ip_random_digit}.32/28 \ --metadata=block-project-ssh-keys=TRUE \ --monitoring=SYSTEM,POD,DEPLOYMENT \ - --logging=SYSTEM,WORKLOAD + --logging=SYSTEM,WORKLOAD \ + --machine-type ${GKE_MACHINE_TYPE} \ + --num-nodes ${GKE_NUM_NODES} # Add current IP to GKE master control node access, if this script is not invoked during a CI run. if [ "${IS_CI}" = "false" ]; then diff --git a/tests/scripts/memory-plot.gp b/tests/scripts/memory-plot.gp new file mode 100644 index 000000000..c25614db7 --- /dev/null +++ b/tests/scripts/memory-plot.gp @@ -0,0 +1,21 @@ +# Define a function to convert bytes to Mebibytes +bytes_to_MiB(bytes) = bytes / (1024.0 * 1024.0) + +set terminal png size 800,600 +set title "Memory Usage" +set datafile separator "," +set output outputfile . "" + +# X-axis settings +set xlabel "Timestamp" +set xdata time +set timefmt "%s" +set format x "%M:%S" +set xrange [*:*] # Specify a range covering all timestamps + +# Y-axis settings +set yrange [0:*] +set ylabel "Memory Usage (MiB)" + +# Plotting data +plot inputfile using 1:(bytes_to_MiB($2)) with lines lw 2 notitle diff --git a/tests/scripts/ttr-plot.gp b/tests/scripts/ttr-plot.gp new file mode 100644 index 000000000..97d0eb289 --- /dev/null +++ b/tests/scripts/ttr-plot.gp @@ -0,0 +1,18 @@ +set terminal png size 800,600 +set title "Scaling resources" +set datafile separator "," +set output outputfile . "" + +# X-axis settings +set xrange [0:70] +set xtics 10 +set xlabel "# Resources" +set grid xtics + +# Y-axis settings +set yrange [0:*] +set ylabel "Time to Ready (s)" +set format y "%.1f" + +# Plotting data +plot inputfile using 1:2 with lines lw 2 notitle diff --git a/tests/scripts/vars.env-example b/tests/scripts/vars.env-example index c4163b012..52138f113 100644 --- a/tests/scripts/vars.env-example +++ b/tests/scripts/vars.env-example @@ -20,3 +20,5 @@ SOURCE_IP_RANGE= PLUS_ENABLED= NGF_VERSION= +GKE_MACHINE_TYPE= +GKE_NUM_NODES= diff --git a/tests/suite/dataplane_perf_test.go b/tests/suite/dataplane_perf_test.go index 743e79977..ffb07d85f 100644 --- a/tests/suite/dataplane_perf_test.go +++ b/tests/suite/dataplane_perf_test.go @@ -97,7 +97,7 @@ var _ = Describe("Dataplane performance", Ordered, Label("nfr", "performance"), } _, metrics := framework.RunLoadTest(cfg) - Expect(framework.WriteResults(outFile, &metrics)).To(Succeed()) + Expect(framework.WriteVegetaResults(outFile, &metrics)).To(Succeed()) _, err = fmt.Fprint(outFile, "```\n") Expect(err).ToNot(HaveOccurred()) diff --git a/tests/scale/manifests/scale-matches.yaml b/tests/suite/manifests/scale/matches.yaml similarity index 100% rename from tests/scale/manifests/scale-matches.yaml rename to tests/suite/manifests/scale/matches.yaml diff --git a/tests/scale/manifests/scale-upstreams.yaml b/tests/suite/manifests/scale/upstreams.yaml similarity index 100% rename from tests/scale/manifests/scale-upstreams.yaml rename to tests/suite/manifests/scale/upstreams.yaml diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go new file mode 100644 index 000000000..db87c3259 --- /dev/null +++ b/tests/suite/scale_test.go @@ -0,0 +1,817 @@ +package suite + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strconv" + "strings" + "text/template" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" + core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctlr "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/nginxinc/nginx-gateway-fabric/tests/framework" +) + +var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { + // One of the tests - scales upstream servers - requires a big cluster to provision 648 pods. + // On GKE, you can use the following configuration: + // - A Kubernetes cluster with 12 nodes on GKE + // - Node: n2d-standard-16 (16 vCPU, 64GB memory) + + var ( + matchesManifests = []string{ + "scale/matches.yaml", + } + upstreamsManifests = []string{ + "scale/upstreams.yaml", + } + + ns = &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "scale", + }, + } + + scrapeInterval = 15 * time.Second + queryRangeStep = 15 * time.Second + + resultsDir string + outFile *os.File + ngfPodName string + promInstance framework.PrometheusInstance + promPortForwardStopCh = make(chan struct{}) + ) + + const ( + httpListenerCount = 64 + httpsListenerCount = 64 + httpRouteCount = 1000 + upstreamServerCount = 648 + ) + + BeforeAll(func() { + // Scale tests need a dedicated NGF instance + // Because they analyze the logs of NGF and NGINX, and they don't want to analyze the logs of other tests. + teardown(releaseName) + + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) + + var err error + resultsDir, err = framework.CreateResultsDir("scale", version) + Expect(err).ToNot(HaveOccurred()) + + filename := filepath.Join(resultsDir, fmt.Sprintf("%s.md", version)) + outFile, err = framework.CreateResultsFile(filename) + Expect(err).ToNot(HaveOccurred()) + Expect(framework.WriteSystemInfoToFile(outFile, clusterInfo, *plusEnabled)).To(Succeed()) + + promCfg := framework.PrometheusConfig{ + ScrapeInterval: scrapeInterval, + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + promInstance, err = framework.InstallPrometheus(ctx, resourceManager, promCfg) + Expect(err).ToNot(HaveOccurred()) + + k8sConfig := ctlr.GetConfigOrDie() + + if !clusterInfo.IsGKE { + Expect(promInstance.PortForward(k8sConfig, promPortForwardStopCh)).To(Succeed()) + } + }) + + BeforeEach(func() { + // Scale tests need a dedicated NGF instance per test. + // Because they analyze the logs of NGF and NGINX, and they don't want to analyze the logs of other tests. + cfg := getDefaultSetupCfg() + labelFilter := GinkgoLabelFilter() + cfg.nfr = isNFR(labelFilter) + setup(cfg) + + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).To(HaveLen(1)) + ngfPodName = podNames[0] + }) + + createResponseChecker := func(url, address string) func() error { + return func() error { + status, _, err := framework.Get(url, address, timeoutConfig.RequestTimeout) + if err != nil { + return fmt.Errorf("bad response: %w", err) + } + + if status != 200 { + return fmt.Errorf("unexpected status code: %d", status) + } + + return nil + } + } + + createMetricExistChecker := func(query string, getTime func() time.Time, modifyTime func()) func() error { + return func() error { + queryWithTimestamp := fmt.Sprintf("%s @ %d", query, getTime().Unix()) + + result, err := promInstance.Query(queryWithTimestamp) + if err != nil { + return fmt.Errorf("failed to query Prometheus: %w", err) + } + + if result.String() == "" { + modifyTime() + return errors.New("empty result") + } + + return nil + } + } + + createEndTimeFinder := func(query string, startTime time.Time, t *time.Time) func() error { + return func() error { + result, err := promInstance.QueryRange(query, v1.Range{ + Start: startTime, + End: *t, + Step: queryRangeStep, + }) + if err != nil { + return fmt.Errorf("failed to query Prometheus: %w", err) + } + + if result.String() == "" { + *t = time.Now() + return errors.New("empty result") + } + + return nil + } + } + + getFirstValueOfVector := func(query string) float64 { + result, err := promInstance.Query(query) + Expect(err).ToNot(HaveOccurred()) + + val, err := framework.GetFirstValueOfPrometheusVector(result) + Expect(err).ToNot(HaveOccurred()) + + return val + } + + getBuckets := func(query string) []bucket { + result, err := promInstance.Query(query) + Expect(err).ToNot(HaveOccurred()) + + res, ok := result.(model.Vector) + Expect(ok).To(BeTrue()) + + buckets := make([]bucket, 0, len(res)) + + for _, sample := range res { + le := sample.Metric["le"] + val := float64(sample.Value) + bucket := bucket{ + Le: string(le), + Val: int(val), + } + buckets = append(buckets, bucket) + } + + return buckets + } + + checkLogErrors := func(containerName string, substrings []string, fileName string) int { + logs, err := resourceManager.GetPodLogs(ngfNamespace, ngfPodName, &core.PodLogOptions{ + Container: containerName, + }) + Expect(err).ToNot(HaveOccurred()) + + logLines := strings.Split(logs, "\n") + errors := 0 + + outer: + for _, line := range logLines { + for _, substr := range substrings { + if strings.Contains(line, substr) { + errors++ + continue outer + } + } + } + + // attach full logs + if errors > 0 { + f, err := os.Create(fileName) + Expect(err).ToNot(HaveOccurred()) + defer f.Close() + + _, err = io.WriteString(f, logs) + Expect(err).ToNot(HaveOccurred()) + } + return errors + } + + runTestWithMetricsAndLogs := func(testName, testResultsDir string, test func()) { + var ( + metricExistTimeout = 2 * time.Minute + metricExistPolling = 1 * time.Second + ) + + startTime := time.Now() + + // We need to make sure that for the startTime, the metrics exists in Prometheus. + // if they don't exist, we increase the startTime and try again. + // Note: it's important that Polling interval in Eventually is greater than the startTime increment. + + getStartTime := func() time.Time { return startTime } + modifyStartTime := func() { startTime = startTime.Add(500 * time.Millisecond) } + + queries := []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), + } + + for _, q := range queries { + Eventually( + createMetricExistChecker( + q, + getStartTime, + modifyStartTime, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + } + + test() + + // We sleep for 2 scape intervals to ensure that Prometheus scrapes the metrics after the test() finishes + // before endTime, so that we don't lose any metric values like reloads. + time.Sleep(2 * scrapeInterval) + + endTime := time.Now() + + // Now we check that Prometheus has the metrics for the endTime + + // If the test duration is small (which can happen if you run the test with small number of resources), + // the rate query may not return any data. + // To ensure it returns data, we increase the startTime. + Eventually( + createEndTimeFinder( + fmt.Sprintf(`rate(container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}[2m])`, ngfPodName), + startTime, + &endTime, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + + getEndTime := func() time.Time { return endTime } + noOpModifier := func() {} + + queries = []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), + } + + for _, q := range queries { + Eventually( + createMetricExistChecker( + q, + getEndTime, + noOpModifier, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + } + + // Collect metric values + // For some metrics, generate PNGs + + result, err := promInstance.QueryRange( + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + v1.Range{ + Start: startTime, + End: endTime, + Step: queryRangeStep, + }, + ) + Expect(err).ToNot(HaveOccurred()) + + memCSV := filepath.Join(testResultsDir, "memory.csv") + + Expect(framework.WritePrometheusMatrixToCSVFile(memCSV, result)).To(Succeed()) + + Expect( + framework.GenerateMemoryPNG(testResultsDir, memCSV, "memory.png"), + ).To(Succeed()) + + Expect(os.Remove(memCSV)).To(Succeed()) + + result, err = promInstance.QueryRange( + fmt.Sprintf(`rate(container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}[2m])`, ngfPodName), + v1.Range{ + Start: startTime, + End: endTime, + Step: queryRangeStep, + }, + ) + Expect(err).ToNot(HaveOccurred()) + + cpuCSV := filepath.Join(testResultsDir, "cpu.csv") + + Expect(framework.WritePrometheusMatrixToCSVFile(cpuCSV, result)).To(Succeed()) + + Expect( + framework.GenerateCPUPNG(testResultsDir, cpuCSV, "cpu.png"), + ).To(Succeed()) + + Expect(os.Remove(cpuCSV)).To(Succeed()) + + reloadCount := getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%s"} @ %d`, + ngfPodName, + ngfPodName, + startTime.Unix(), + ), + ) + + reloadErrsCount := getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%s"} @ %d`, + ngfPodName, + ngfPodName, + startTime.Unix(), + ), + ) + + reloadAvgTime := getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%s"} @ %d)`+ + ` / `+ + `(nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%s"} @ %d)`, + ngfPodName, + ngfPodName, + startTime.Unix(), + ngfPodName, + ngfPodName, + startTime.Unix(), + )) + + reloadBuckets := getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%s"} @ %d`, + ngfPodName, + ngfPodName, + startTime.Unix(), + ), + ) + + eventsCount := getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%s"} @ %d`, + ngfPodName, + ngfPodName, + startTime.Unix(), + ), + ) + + eventsAvgTime := getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%s"} @ %d)`+ + ` / `+ + `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%s"} @ %d)`, + ngfPodName, + ngfPodName, + startTime.Unix(), + ngfPodName, + ngfPodName, + startTime.Unix(), + ), + ) + + eventsBuckets := getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%s"} @ %d`, + ngfPodName, + ngfPodName, + startTime.Unix(), + ), + ) + + // Check container logs for errors + + ngfErrors := checkLogErrors( + "nginx-gateway", + []string{"error"}, + filepath.Join(testResultsDir, "ngf.log"), + ) + nginxErrors := checkLogErrors( + "nginx", + []string{"[error]", "[emerg]", "[crit]", "[alert]"}, + filepath.Join(testResultsDir, "nginx.log"), + ) + + // Check container restarts + + pod, err := resourceManager.GetPod(ngfNamespace, ngfPodName) + Expect(err).ToNot(HaveOccurred()) + + findRestarts := func(name string) int { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.Name == name { + return int(containerStatus.RestartCount) + } + } + Fail(fmt.Sprintf("container %s not found", name)) + return 0 + } + + ngfRestarts := findRestarts("nginx-gateway") + nginxRestarts := findRestarts("nginx") + + // Write results + + results := scaleTestResults{ + Name: testName, + ReloadCount: int(reloadCount), + ReloadErrsCount: int(reloadErrsCount), + ReloadAvgTime: int(reloadAvgTime), + ReloadBuckets: reloadBuckets, + EventsCount: int(eventsCount), + EventsAvgTime: int(eventsAvgTime), + EventsBuckets: eventsBuckets, + NGFErrors: ngfErrors, + NginxErrors: nginxErrors, + NGFContainerRestarts: ngfRestarts, + NginxContainerRestarts: nginxRestarts, + } + + err = writeScaleResults(outFile, results) + Expect(err).ToNot(HaveOccurred()) + } + + runScaleResources := func(objects framework.ScaleObjects, testResultsDir string, protocol string) { + ttrCsvFile, writer, err := framework.NewCSVResultsWriter(testResultsDir, "ttr.csv") + Expect(err).ToNot(HaveOccurred()) + + Expect(resourceManager.Apply(objects.BaseObjects)).To(Succeed()) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + Expect(resourceManager.WaitForPodsToBeReady(ctx, ns.Name)).To(Succeed()) + + for i := 0; i < len(objects.ScaleIterationGroups); i++ { + Expect(resourceManager.Apply(objects.ScaleIterationGroups[i])).To(Succeed()) + + url := fmt.Sprintf("%s://%d.example.com", protocol, i) + + if protocol == "http" && portFwdPort != 0 { + url = fmt.Sprintf("%s://%d.example.com:%d", protocol, i, portFwdPort) + } else if protocol == "https" && portFwdHTTPSPort != 0 { + url = fmt.Sprintf("%s://%d.example.com:%d", protocol, i, portFwdHTTPSPort) + } + + startCheck := time.Now() + + Eventually( + createResponseChecker(url, address), + ).WithTimeout(30 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + ttr := time.Since(startCheck) + + seconds := ttr.Seconds() + record := []string{strconv.Itoa(i + 1), strconv.FormatFloat(seconds, 'f', -1, 64)} + + Expect(writer.Write(record)).To(Succeed()) + } + + writer.Flush() + Expect(ttrCsvFile.Close()).To(Succeed()) + + Expect( + framework.GenerateTTRPNG(testResultsDir, ttrCsvFile.Name(), "ttr.png"), + ).To(Succeed()) + + Expect(os.Remove(ttrCsvFile.Name())).To(Succeed()) + + Expect(resourceManager.Delete(objects.BaseObjects)).To(Succeed()) + for i := 0; i < len(objects.ScaleIterationGroups); i++ { + Expect(resourceManager.Delete(objects.ScaleIterationGroups[i])).To(Succeed()) + } + } + + runScaleUpstreams := func() { + Expect(resourceManager.ApplyFromFiles(upstreamsManifests, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + + url := "http://hello.example.com" + if portFwdPort != 0 { + url = fmt.Sprintf("http://hello.example.com:%d", portFwdPort) + } + + Eventually( + createResponseChecker(url, address), + ).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + Expect( + resourceManager.ScaleDeployment(ns.Name, "backend", upstreamServerCount), + ).To(Succeed()) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + Expect(resourceManager.WaitForPodsToBeReady(ctx, ns.Name)).To(Succeed()) + + Eventually( + createResponseChecker(url, address), + ).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + Expect( + resourceManager.DeleteFromFiles(upstreamsManifests, ns.Name), + ).To(Succeed()) + } + + setNamespace := func(objects framework.ScaleObjects) { + for _, obj := range objects.BaseObjects { + obj.SetNamespace(ns.Name) + } + for _, objs := range objects.ScaleIterationGroups { + for _, obj := range objs { + obj.SetNamespace(ns.Name) + } + } + } + + It(fmt.Sprintf("scales HTTP listeners to %d", httpListenerCount), func() { + const testName = "TestScale_Listeners" + + testResultsDir := filepath.Join(resultsDir, testName) + Expect(os.MkdirAll(testResultsDir, 0755)).To(Succeed()) + + objects, err := framework.GenerateScaleListenerObjects(httpListenerCount, false /*non-tls*/) + Expect(err).ToNot(HaveOccurred()) + + setNamespace(objects) + + runTestWithMetricsAndLogs( + testName, + testResultsDir, + func() { + runScaleResources( + objects, + testResultsDir, + "http", + ) + }, + ) + + Expect( + os.RemoveAll(filepath.Join(testResultsDir, "manifests")), + ).To(Succeed()) + }) + + It(fmt.Sprintf("scales HTTPS listeners to %d", httpsListenerCount), func() { + const testName = "TestScale_HTTPSListeners" + + testResultsDir := filepath.Join(resultsDir, testName) + Expect(os.MkdirAll(testResultsDir, 0o755)).To(Succeed()) + + objects, err := framework.GenerateScaleListenerObjects(httpsListenerCount, true /*tls*/) + Expect(err).ToNot(HaveOccurred()) + + setNamespace(objects) + + runTestWithMetricsAndLogs( + testName, + testResultsDir, + func() { + runScaleResources( + objects, + testResultsDir, + "https", + ) + }, + ) + + Expect( + os.RemoveAll(filepath.Join(testResultsDir, "manifests")), + ).To(Succeed()) + }) + + It(fmt.Sprintf("scales HTTP routes to %d", httpRouteCount), func() { + const testName = "TestScale_HTTPRoutes" + + testResultsDir := filepath.Join(resultsDir, testName) + Expect(os.MkdirAll(testResultsDir, 0o755)).To(Succeed()) + + objects, err := framework.GenerateScaleHTTPRouteObjects(httpRouteCount) + Expect(err).ToNot(HaveOccurred()) + + setNamespace(objects) + + runTestWithMetricsAndLogs( + testName, + testResultsDir, + func() { + runScaleResources( + objects, + testResultsDir, + "http", + ) + }, + ) + + Expect( + os.RemoveAll(filepath.Join(testResultsDir, "manifests")), + ).To(Succeed()) + }) + + It(fmt.Sprintf("scales upstream servers to %d", upstreamServerCount), func() { + const testName = "TestScale_UpstreamServers" + + testResultsDir := filepath.Join(resultsDir, testName) + Expect(os.MkdirAll(testResultsDir, 0o755)).To(Succeed()) + + runTestWithMetricsAndLogs( + testName, + testResultsDir, + func() { + runScaleUpstreams() + }, + ) + }) + + It("scale HTTP matches", func() { + const testName = "TestScale_HTTPMatches" + + Expect(resourceManager.ApplyFromFiles(matchesManifests, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + + var port int + if portFwdPort != 0 { + port = portFwdPort + } else { + port = 80 + } + + addr := fmt.Sprintf("%s:%d", address, port) + + baseURL := "http://cafe.example.com" + + text := fmt.Sprintf("\n## Test %s\n\n", testName) + + _, err := fmt.Fprint(outFile, text) + Expect(err).ToNot(HaveOccurred()) + + run := func(t framework.Target) { + cfg := framework.LoadTestConfig{ + Targets: []framework.Target{t}, + Rate: 1000, + Duration: 30 * time.Second, + Description: "First matches", + Proxy: addr, + ServerName: "cafe.example.com", + } + _, metrics := framework.RunLoadTest(cfg) + + _, err = fmt.Fprintln(outFile, "```text") + Expect(err).ToNot(HaveOccurred()) + Expect(framework.WriteVegetaResults(outFile, &metrics)).To(Succeed()) + _, err = fmt.Fprintln(outFile, "```") + Expect(err).ToNot(HaveOccurred()) + } + + run(framework.Target{ + Method: "GET", + URL: fmt.Sprintf("%s%s", baseURL, "/latte"), + Header: map[string][]string{ + "header-1": {"header-1-val"}, + }, + }) + + run(framework.Target{ + Method: "GET", + URL: fmt.Sprintf("%s%s", baseURL, "/latte"), + Header: map[string][]string{ + "header-50": {"header-50-val"}, + }, + }) + + Expect(resourceManager.DeleteFromFiles(matchesManifests, ns.Name)).To(Succeed()) + }) + + AfterEach(func() { + teardown(releaseName) + }) + + AfterAll(func() { + close(promPortForwardStopCh) + Expect(framework.UninstallPrometheus()).To(Succeed()) + Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(outFile.Close()).To(Succeed()) + + // restoring NGF shared among tests in the suite + cfg := getDefaultSetupCfg() + labelFilter := GinkgoLabelFilter() + cfg.nfr = isNFR(labelFilter) + setup(cfg) + }) +}) + +type bucket struct { + Le string + Val int +} + +type scaleTestResults struct { + Name string + + ReloadCount int + ReloadErrsCount int + ReloadAvgTime int + ReloadBuckets []bucket + + EventsCount int + EventsAvgTime int + EventsBuckets []bucket + + NGFErrors int + NginxErrors int + + NGFContainerRestarts int + NginxContainerRestarts int +} + +const scaleResultTemplate = ` +## Test {{ .Name }} + +### Reloads + +- Total: {{ .ReloadCount }} +- Total Errors: {{ .ReloadErrsCount }} +- Average Time: {{ .ReloadAvgTime }}ms +- Reload distribution: +{{- range .ReloadBuckets }} + - {{ .Le }}ms: {{ .Val }} +{{- end }} + +### Event Batch Processing + +- Total: {{ .EventsCount }} +- Average Time: {{ .EventsAvgTime }}ms +- Event Batch Processing distribution: +{{- range .EventsBuckets }} + - {{ .Le }}ms: {{ .Val }} +{{- end }} + +### Errors + +- NGF errors: {{ .NGFErrors }} +- NGF container restarts: {{ .NGFContainerRestarts }} +- NGINX errors: {{ .NginxErrors }} +- NGINX container restarts: {{ .NginxContainerRestarts }} + +### Graphs and Logs + +See [output directory](./{{ .Name }}) for more details. +The logs are attached only if there are errors. +` + +func writeScaleResults(dest io.Writer, results scaleTestResults) error { + tmpl, err := template.New("results").Parse(scaleResultTemplate) + if err != nil { + return err + } + + return tmpl.Execute(dest, results) +} diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 7c1218d15..fad17d8e4 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -4,6 +4,7 @@ import ( "context" "embed" "flag" + "fmt" "path" "path/filepath" "runtime" @@ -64,8 +65,9 @@ var ( manifests embed.FS k8sClient client.Client resourceManager framework.ResourceManager - portForwardStopCh = make(chan struct{}, 1) + portForwardStopCh chan struct{} portFwdPort int + portFwdHTTPSPort int timeoutConfig framework.TimeoutConfig localChartPath string address string @@ -75,8 +77,10 @@ var ( ) const ( - releaseName = "ngf-test" - ngfNamespace = "nginx-gateway" + releaseName = "ngf-test" + ngfNamespace = "nginx-gateway" + ngfHTTPForwardedPort = 10080 + ngfHTTPSForwardedPort = 10443 ) type setupConfig struct { @@ -179,8 +183,12 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { Expect(podNames).ToNot(BeEmpty()) if *serviceType != "LoadBalancer" { - portFwdPort, err = framework.PortForward(k8sConfig, installCfg.Namespace, podNames[0], portForwardStopCh) + ports := []string{fmt.Sprintf("%d:80", ngfHTTPForwardedPort), fmt.Sprintf("%d:443", ngfHTTPSForwardedPort)} + portForwardStopCh = make(chan struct{}) + err = framework.PortForward(k8sConfig, installCfg.Namespace, podNames[0], ports, portForwardStopCh) address = "127.0.0.1" + portFwdPort = ngfHTTPForwardedPort + portFwdHTTPSPort = ngfHTTPSForwardedPort } else { address, err = resourceManager.GetLBIPAddress(installCfg.Namespace) } @@ -189,7 +197,9 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { func teardown(relName string) { if portFwdPort != 0 { - portForwardStopCh <- struct{}{} + close(portForwardStopCh) + portFwdPort = 0 + portFwdHTTPSPort = 0 } cfg := framework.InstallationConfig{ @@ -242,15 +252,18 @@ var _ = BeforeSuite(func() { cfg.nfr = isNFR(labelFilter) // Skip deployment if: - // - running upgrade test (this test will deploy its own version) - // - running longevity teardown (deployment will already exist) - // - running telemetry test (NGF will be deployed as part of the test) - if strings.Contains(labelFilter, "upgrade") || - strings.Contains(labelFilter, "longevity-teardown") || - strings.Contains(labelFilter, "telemetry") || - strings.Contains(labelFilter, "graceful-recovery") { - - cfg.deploy = false + skipSubstrings := []string{ + "upgrade", // - running upgrade test (this test will deploy its own version) + "longevity-teardown", // - running longevity teardown (deployment will already exist) + "telemetry", // - running telemetry test (NGF will be deployed as part of the test) + "scale", // - running scale test (this test will deploy its own version) + "graceful-recovery", // - running graceful recovery test (this test will deploy its own version) + } + for _, s := range skipSubstrings { + if strings.Contains(labelFilter, s) { + cfg.deploy = false + break + } } // use a different release name for longevity to allow us to filter on a specific label when collecting diff --git a/tests/suite/upgrade_test.go b/tests/suite/upgrade_test.go index 09b3ecb8c..a6fe32b2b 100644 --- a/tests/suite/upgrade_test.go +++ b/tests/suite/upgrade_test.go @@ -157,7 +157,7 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { } buf := new(bytes.Buffer) - encoder := framework.NewCSVEncoder(buf) + encoder := framework.NewVegetaCSVEncoder(buf) for _, res := range results { res := res Expect(encoder.Encode(&res)).To(Succeed()) @@ -173,8 +173,9 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { csvFile.Close() pngName := framework.CreateResultsFilename("png", scheme, *plusEnabled) - output, err := framework.GeneratePNG(resultsDir, csvName, pngName) - Expect(err).ToNot(HaveOccurred(), string(output)) + Expect( + framework.GenerateRequestsPNG(resultsDir, csvName, pngName), + ).To(Succeed()) metricsCh <- &metricsRes }(test) @@ -251,7 +252,7 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { _, err := fmt.Fprint(resultsFile, res.testName) Expect(err).ToNot(HaveOccurred()) - Expect(framework.WriteResults(resultsFile, res.metrics)).To(Succeed()) + Expect(framework.WriteVegetaResults(resultsFile, res.metrics)).To(Succeed()) link := fmt.Sprintf("\n\n![%[1]v.png](%[1]v.png)\n", res.scheme) if *plusEnabled { From 449699bf65ac005d24c17a425bee453afab19db5 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 10:50:52 -0400 Subject: [PATCH 02/21] requests-plot.gpgp -> requests-plot.gp --- tests/framework/results.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/framework/results.go b/tests/framework/results.go index 0e02a0037..795cbaf2a 100644 --- a/tests/framework/results.go +++ b/tests/framework/results.go @@ -87,7 +87,7 @@ func generatePNG(resultsDir, inputFilename, outputFilename, configFilename strin // GenerateRequestsPNG generates a Requests PNG using gnuplot. func GenerateRequestsPNG(resultsDir, inputFilename, outputFilename string) error { - return generatePNG(resultsDir, inputFilename, outputFilename, "requests-plot.gpgp") + return generatePNG(resultsDir, inputFilename, outputFilename, "requests-plot.gp") } // GenerateTTRPNG generates a TTR PNG using gnuplot. From abff461978531c4a7bae22ff97c8e2253ba25e02 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 10:53:50 -0400 Subject: [PATCH 03/21] WriteVegetaResults -> WriteMetricsResults --- tests/framework/results.go | 4 ++-- tests/suite/dataplane_perf_test.go | 2 +- tests/suite/scale_test.go | 2 +- tests/suite/upgrade_test.go | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/framework/results.go b/tests/framework/results.go index 795cbaf2a..87d363b08 100644 --- a/tests/framework/results.go +++ b/tests/framework/results.go @@ -105,8 +105,8 @@ func GenerateMemoryPNG(resultsDir, inputFilename, outputFilename string) error { return generatePNG(resultsDir, inputFilename, outputFilename, "memory-plot.gp") } -// WriteVegetaResults writes the vegeta metrics results to the results file in text format. -func WriteVegetaResults(resultsFile *os.File, metrics *Metrics) error { +// WriteMetricsResults writes the metrics results to the results file in text format. +func WriteMetricsResults(resultsFile *os.File, metrics *Metrics) error { reporter := vegeta.NewTextReporter(&metrics.Metrics) return reporter.Report(resultsFile) diff --git a/tests/suite/dataplane_perf_test.go b/tests/suite/dataplane_perf_test.go index ffb07d85f..27b465939 100644 --- a/tests/suite/dataplane_perf_test.go +++ b/tests/suite/dataplane_perf_test.go @@ -97,7 +97,7 @@ var _ = Describe("Dataplane performance", Ordered, Label("nfr", "performance"), } _, metrics := framework.RunLoadTest(cfg) - Expect(framework.WriteVegetaResults(outFile, &metrics)).To(Succeed()) + Expect(framework.WriteMetricsResults(outFile, &metrics)).To(Succeed()) _, err = fmt.Fprint(outFile, "```\n") Expect(err).ToNot(HaveOccurred()) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index db87c3259..47483810f 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -706,7 +706,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { _, err = fmt.Fprintln(outFile, "```text") Expect(err).ToNot(HaveOccurred()) - Expect(framework.WriteVegetaResults(outFile, &metrics)).To(Succeed()) + Expect(framework.WriteMetricsResults(outFile, &metrics)).To(Succeed()) _, err = fmt.Fprintln(outFile, "```") Expect(err).ToNot(HaveOccurred()) } diff --git a/tests/suite/upgrade_test.go b/tests/suite/upgrade_test.go index a6fe32b2b..89e785f4f 100644 --- a/tests/suite/upgrade_test.go +++ b/tests/suite/upgrade_test.go @@ -252,7 +252,7 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { _, err := fmt.Fprint(resultsFile, res.testName) Expect(err).ToNot(HaveOccurred()) - Expect(framework.WriteVegetaResults(resultsFile, res.metrics)).To(Succeed()) + Expect(framework.WriteMetricsResults(resultsFile, res.metrics)).To(Succeed()) link := fmt.Sprintf("\n\n![%[1]v.png](%[1]v.png)\n", res.scheme) if *plusEnabled { From d01ddf38efe7bfeddbee1364e79495721ab6165a Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:15:26 -0400 Subject: [PATCH 04/21] Support Plus in test output files --- tests/suite/scale_test.go | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 47483810f..7cc4784a3 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -72,7 +72,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { resultsDir, err = framework.CreateResultsDir("scale", version) Expect(err).ToNot(HaveOccurred()) - filename := filepath.Join(resultsDir, fmt.Sprintf("%s.md", version)) + filename := filepath.Join(resultsDir, framework.CreateResultsFilename("md", version, *plusEnabled)) outFile, err = framework.CreateResultsFile(filename) Expect(err).ToNot(HaveOccurred()) Expect(framework.WriteSystemInfoToFile(outFile, clusterInfo, *plusEnabled)).To(Succeed()) @@ -309,12 +309,12 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ) Expect(err).ToNot(HaveOccurred()) - memCSV := filepath.Join(testResultsDir, "memory.csv") - + memCSV := filepath.Join(testResultsDir, framework.CreateResultsFilename("csv", "memory", *plusEnabled)) Expect(framework.WritePrometheusMatrixToCSVFile(memCSV, result)).To(Succeed()) + memPNG := framework.CreateResultsFilename("png", "memory", *plusEnabled) Expect( - framework.GenerateMemoryPNG(testResultsDir, memCSV, "memory.png"), + framework.GenerateMemoryPNG(testResultsDir, memCSV, memPNG), ).To(Succeed()) Expect(os.Remove(memCSV)).To(Succeed()) @@ -329,12 +329,12 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ) Expect(err).ToNot(HaveOccurred()) - cpuCSV := filepath.Join(testResultsDir, "cpu.csv") - + cpuCSV := filepath.Join(testResultsDir, framework.CreateResultsFilename("csv", "cpu", *plusEnabled)) Expect(framework.WritePrometheusMatrixToCSVFile(cpuCSV, result)).To(Succeed()) + cpuPNG := framework.CreateResultsFilename("png", "cpu", *plusEnabled) Expect( - framework.GenerateCPUPNG(testResultsDir, cpuCSV, "cpu.png"), + framework.GenerateCPUPNG(testResultsDir, cpuCSV, cpuPNG), ).To(Succeed()) Expect(os.Remove(cpuCSV)).To(Succeed()) @@ -434,12 +434,12 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ngfErrors := checkLogErrors( "nginx-gateway", []string{"error"}, - filepath.Join(testResultsDir, "ngf.log"), + filepath.Join(testResultsDir, framework.CreateResultsFilename("log", "ngf", *plusEnabled)), ) nginxErrors := checkLogErrors( "nginx", []string{"[error]", "[emerg]", "[crit]", "[alert]"}, - filepath.Join(testResultsDir, "nginx.log"), + filepath.Join(testResultsDir, framework.CreateResultsFilename("log", "nginx", *plusEnabled)), ) // Check container restarts @@ -482,7 +482,8 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { } runScaleResources := func(objects framework.ScaleObjects, testResultsDir string, protocol string) { - ttrCsvFile, writer, err := framework.NewCSVResultsWriter(testResultsDir, "ttr.csv") + ttrCsvFileName := framework.CreateResultsFilename("csv", "ttr", *plusEnabled) + ttrCsvFile, writer, err := framework.NewCSVResultsWriter(testResultsDir, ttrCsvFileName) Expect(err).ToNot(HaveOccurred()) Expect(resourceManager.Apply(objects.BaseObjects)).To(Succeed()) @@ -519,8 +520,9 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { writer.Flush() Expect(ttrCsvFile.Close()).To(Succeed()) + ttrPNG := framework.CreateResultsFilename("png", "ttr", *plusEnabled) Expect( - framework.GenerateTTRPNG(testResultsDir, ttrCsvFile.Name(), "ttr.png"), + framework.GenerateTTRPNG(testResultsDir, ttrCsvFile.Name(), ttrPNG), ).To(Succeed()) Expect(os.Remove(ttrCsvFile.Name())).To(Succeed()) From 1d793f88184a8c8562e05150d5ab1c9c461185dc Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:18:53 -0400 Subject: [PATCH 05/21] Simplify config creation --- tests/suite/scale_test.go | 6 ++---- tests/suite/system_suite_test.go | 5 +++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 7cc4784a3..909a2dcb7 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -98,8 +98,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { // Scale tests need a dedicated NGF instance per test. // Because they analyze the logs of NGF and NGINX, and they don't want to analyze the logs of other tests. cfg := getDefaultSetupCfg() - labelFilter := GinkgoLabelFilter() - cfg.nfr = isNFR(labelFilter) + cfg.nfr = true setup(cfg) podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) @@ -744,8 +743,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { // restoring NGF shared among tests in the suite cfg := getDefaultSetupCfg() - labelFilter := GinkgoLabelFilter() - cfg.nfr = isNFR(labelFilter) + cfg.nfr = true setup(cfg) }) }) diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index fad17d8e4..0dd0b3f4b 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -256,8 +256,8 @@ var _ = BeforeSuite(func() { "upgrade", // - running upgrade test (this test will deploy its own version) "longevity-teardown", // - running longevity teardown (deployment will already exist) "telemetry", // - running telemetry test (NGF will be deployed as part of the test) - "scale", // - running scale test (this test will deploy its own version) "graceful-recovery", // - running graceful recovery test (this test will deploy its own version) + "scale", // - running scale test (this test will deploy its own version) } for _, s := range skipSubstrings { if strings.Contains(labelFilter, s) { @@ -296,5 +296,6 @@ func isNFR(labelFilter string) bool { strings.Contains(labelFilter, "longevity") || strings.Contains(labelFilter, "performance") || strings.Contains(labelFilter, "upgrade") || - strings.Contains(labelFilter, "graceful-recovery") + strings.Contains(labelFilter, "graceful-recovery") || + strings.Contains(labelFilter, "scale") } From e4439f2cb10071ab0eb0fdef81aeaf931797aebe Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:29:09 -0400 Subject: [PATCH 06/21] Simplify printfs --- tests/suite/scale_test.go | 49 ++++++++++++++------------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 909a2dcb7..2b6202a0d 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -340,10 +340,9 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { reloadCount := getFirstValueOfVector( fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%s"} @ %d`, - ngfPodName, + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`, ngfPodName, startTime.Unix(), ), @@ -351,10 +350,9 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { reloadErrsCount := getFirstValueOfVector( fmt.Sprintf( - `nginx_gateway_fabric_nginx_reload_errors_total{pod="%s"}`+ + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_nginx_reload_errors_total{pod="%s"} @ %d`, - ngfPodName, + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, ngfPodName, startTime.Unix(), ), @@ -362,27 +360,22 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { reloadAvgTime := getFirstValueOfVector( fmt.Sprintf( - `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%s"}`+ + `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%s"} @ %d)`+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ ` / `+ - `(nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`+ + `(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_nginx_reloads_total{pod="%s"} @ %d)`, - ngfPodName, - ngfPodName, - startTime.Unix(), - ngfPodName, + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`, ngfPodName, startTime.Unix(), )) reloadBuckets := getBuckets( fmt.Sprintf( - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%s"}`+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%s"} @ %d`, - ngfPodName, + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`, ngfPodName, startTime.Unix(), ), @@ -390,10 +383,9 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { eventsCount := getFirstValueOfVector( fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%s"}`+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%s"} @ %d`, - ngfPodName, + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`, ngfPodName, startTime.Unix(), ), @@ -401,17 +393,13 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { eventsAvgTime := getFirstValueOfVector( fmt.Sprintf( - `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%s"}`+ + `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%s"} @ %d)`+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ ` / `+ - `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%s"}`+ + `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%s"} @ %d)`, - ngfPodName, - ngfPodName, - startTime.Unix(), - ngfPodName, + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`, ngfPodName, startTime.Unix(), ), @@ -419,10 +407,9 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { eventsBuckets := getBuckets( fmt.Sprintf( - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%s"}`+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+ ` - `+ - `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%s"} @ %d`, - ngfPodName, + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`, ngfPodName, startTime.Unix(), ), From 285b95e16eebfcb0e8b71ab8caf2f87ebaff919b Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:30:41 -0400 Subject: [PATCH 07/21] Improve defer readability --- tests/suite/scale_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 2b6202a0d..93a80c0b2 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -476,6 +476,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() + Expect(resourceManager.WaitForPodsToBeReady(ctx, ns.Name)).To(Succeed()) for i := 0; i < len(objects.ScaleIterationGroups); i++ { @@ -538,6 +539,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() + Expect(resourceManager.WaitForPodsToBeReady(ctx, ns.Name)).To(Succeed()) Eventually( From e6aa42dfc4fb4d0fb4e2a9f2f56a6bb795a3cab4 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:37:43 -0400 Subject: [PATCH 08/21] Clean up namespace for each test to ensure handling any left over objects when a test fails --- tests/suite/scale_test.go | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 93a80c0b2..68416437c 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -66,8 +66,6 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { // Because they analyze the logs of NGF and NGINX, and they don't want to analyze the logs of other tests. teardown(releaseName) - Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) - var err error resultsDir, err = framework.CreateResultsDir("scale", version) Expect(err).ToNot(HaveOccurred()) @@ -101,6 +99,8 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { cfg.nfr = true setup(cfg) + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) Expect(err).ToNot(HaveOccurred()) Expect(podNames).To(HaveLen(1)) @@ -471,6 +471,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ttrCsvFileName := framework.CreateResultsFilename("csv", "ttr", *plusEnabled) ttrCsvFile, writer, err := framework.NewCSVResultsWriter(testResultsDir, ttrCsvFileName) Expect(err).ToNot(HaveOccurred()) + defer ttrCsvFile.Close() Expect(resourceManager.Apply(objects.BaseObjects)).To(Succeed()) @@ -513,11 +514,6 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ).To(Succeed()) Expect(os.Remove(ttrCsvFile.Name())).To(Succeed()) - - Expect(resourceManager.Delete(objects.BaseObjects)).To(Succeed()) - for i := 0; i < len(objects.ScaleIterationGroups); i++ { - Expect(resourceManager.Delete(objects.ScaleIterationGroups[i])).To(Succeed()) - } } runScaleUpstreams := func() { @@ -545,10 +541,6 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { Eventually( createResponseChecker(url, address), ).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) - - Expect( - resourceManager.DeleteFromFiles(upstreamsManifests, ns.Name), - ).To(Succeed()) } setNamespace := func(objects framework.ScaleObjects) { @@ -716,18 +708,16 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { "header-50": {"header-50-val"}, }, }) - - Expect(resourceManager.DeleteFromFiles(matchesManifests, ns.Name)).To(Succeed()) }) AfterEach(func() { teardown(releaseName) + Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) }) AfterAll(func() { close(promPortForwardStopCh) Expect(framework.UninstallPrometheus()).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) Expect(outFile.Close()).To(Succeed()) // restoring NGF shared among tests in the suite From 5b41ef9b7d7bcefaab3ec4074d9472d8aab86795 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:51:02 -0400 Subject: [PATCH 09/21] Remove unnecessary RemoveAll --- tests/suite/scale_test.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 68416437c..177e23a9a 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -604,10 +604,6 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ) }, ) - - Expect( - os.RemoveAll(filepath.Join(testResultsDir, "manifests")), - ).To(Succeed()) }) It(fmt.Sprintf("scales HTTP routes to %d", httpRouteCount), func() { @@ -632,10 +628,6 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ) }, ) - - Expect( - os.RemoveAll(filepath.Join(testResultsDir, "manifests")), - ).To(Succeed()) }) It(fmt.Sprintf("scales upstream servers to %d", upstreamServerCount), func() { From 7d8c8019ec792fea633c07519e04b0601079defb Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:51:18 -0400 Subject: [PATCH 10/21] scale -> scales --- tests/suite/scale_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 177e23a9a..62b283572 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -645,7 +645,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ) }) - It("scale HTTP matches", func() { + It("scales HTTP matches", func() { const testName = "TestScale_HTTPMatches" Expect(resourceManager.ApplyFromFiles(matchesManifests, ns.Name)).To(Succeed()) From 05acd04a80b774e65c2e3658af9a1fab74d1332b Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:52:52 -0400 Subject: [PATCH 11/21] Simplify pod number checking conditions --- tests/framework/prometheus.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/framework/prometheus.go b/tests/framework/prometheus.go index eefca29fe..f943a20ed 100644 --- a/tests/framework/prometheus.go +++ b/tests/framework/prometheus.go @@ -83,12 +83,8 @@ func InstallPrometheus( return PrometheusInstance{}, fmt.Errorf("failed to get Prometheus pods: %w", err) } - if len(pods) == 0 { - return PrometheusInstance{}, errors.New("no Prometheus pods found") - } - - if len(pods) > 1 { - return PrometheusInstance{}, errors.New("multiple Prometheus pods found, expected one") + if len(pods) != 1 { + return PrometheusInstance{}, fmt.Errorf("expected one Prometheus pod, found %d", len(pods)) } pod := pods[0] From 6a6a658226ed4a32b6ca4d12badf3bddab7a0201 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 11:54:20 -0400 Subject: [PATCH 12/21] Remove unnecessary context --- tests/framework/prometheus.go | 1 - tests/suite/scale_test.go | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/framework/prometheus.go b/tests/framework/prometheus.go index f943a20ed..07204a200 100644 --- a/tests/framework/prometheus.go +++ b/tests/framework/prometheus.go @@ -36,7 +36,6 @@ type PrometheusConfig struct { // InstallPrometheus installs Prometheus in the cluster. // It waits for Prometheus pods to be ready before returning. func InstallPrometheus( - ctx context.Context, rm ResourceManager, cfg PrometheusConfig, ) (PrometheusInstance, error) { diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 62b283572..6e2f343ca 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -79,10 +79,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ScrapeInterval: scrapeInterval, } - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) - defer cancel() - - promInstance, err = framework.InstallPrometheus(ctx, resourceManager, promCfg) + promInstance, err = framework.InstallPrometheus(resourceManager, promCfg) Expect(err).ToNot(HaveOccurred()) k8sConfig := ctlr.GetConfigOrDie() From 069ea0bf9769917ed88d4af0883d6b4a21fde9d2 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 13:20:40 -0400 Subject: [PATCH 13/21] Delete Prometheus namespace --- tests/framework/prometheus.go | 14 +++++++++++++- tests/suite/scale_test.go | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/framework/prometheus.go b/tests/framework/prometheus.go index 07204a200..cb35177b7 100644 --- a/tests/framework/prometheus.go +++ b/tests/framework/prometheus.go @@ -13,6 +13,8 @@ import ( "github.com/prometheus/client_golang/api" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" + core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -108,7 +110,7 @@ func InstallPrometheus( } // UninstallPrometheus uninstalls Prometheus from the cluster. -func UninstallPrometheus() error { +func UninstallPrometheus(rm ResourceManager) error { output, err := exec.Command( "helm", "uninstall", @@ -119,6 +121,16 @@ func UninstallPrometheus() error { return fmt.Errorf("failed to uninstall Prometheus: %w; output: %s", err, string(output)) } + ns := &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: prometheusNamespace, + }, + } + + if err := rm.Delete([]client.Object{ns}); err != nil { + return fmt.Errorf("failed to delete Prometheus namespace: %w", err) + } + return nil } diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 6e2f343ca..e9cc4a7d1 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -706,7 +706,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { AfterAll(func() { close(promPortForwardStopCh) - Expect(framework.UninstallPrometheus()).To(Succeed()) + Expect(framework.UninstallPrometheus(resourceManager)).To(Succeed()) Expect(outFile.Close()).To(Succeed()) // restoring NGF shared among tests in the suite From e9e02be607952bdb1499768a18d77ff25c20b62d Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 13:24:46 -0400 Subject: [PATCH 14/21] Fix log formatting --- tests/framework/prometheus.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/framework/prometheus.go b/tests/framework/prometheus.go index cb35177b7..15a018bde 100644 --- a/tests/framework/prometheus.go +++ b/tests/framework/prometheus.go @@ -216,7 +216,8 @@ func (ins *PrometheusInstance) QueryWithCtx(ctx context.Context, query string) ( } if len(warnings) > 0 { - slog.Info("Prometheus query returned warnings", + slog.Info( + "Prometheus query returned warnings", "query", query, "warnings", warnings, ) @@ -245,7 +246,8 @@ func (ins *PrometheusInstance) QueryRangeWithCtx(ctx context.Context, query stri } if len(warnings) > 0 { - slog.Info("Prometheus range query returned warnings", + slog.Info( + "Prometheus range query returned warnings", "query", query, "range", promRange, "warnings", warnings, From c359c69ef2939317f517cf871771226f92760736 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 13:26:56 -0400 Subject: [PATCH 15/21] Simplify error handling when updating resources --- tests/framework/resourcemanager.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index 9b5c19475..e1024ebf8 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -103,10 +103,7 @@ func (rm *ResourceManager) Apply(resources []client.Object) error { return err } resource.SetResourceVersion(obj.GetResourceVersion()) - if err := rm.K8sClient.Update(ctx, resource); err != nil { - return err - } - return nil + return rm.K8sClient.Update(ctx, resource) }) if err != nil { return fmt.Errorf("error updating resource: %w", err) @@ -144,10 +141,7 @@ func (rm *ResourceManager) ApplyFromFiles(files []string, namespace string) erro return err } obj.SetResourceVersion(fetchedObj.GetResourceVersion()) - if err := rm.K8sClient.Update(ctx, &obj); err != nil { - return err - } - return nil + return rm.K8sClient.Update(ctx, &obj) }) if err != nil { return fmt.Errorf("error updating resource: %w", err) From 37ed800886047cb68d6520279d98314e612bbb89 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 13:32:13 -0400 Subject: [PATCH 16/21] improve (hopefully) port forward logic readability --- tests/suite/scale_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index e9cc4a7d1..3a30efb3d 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -480,12 +480,13 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { for i := 0; i < len(objects.ScaleIterationGroups); i++ { Expect(resourceManager.Apply(objects.ScaleIterationGroups[i])).To(Succeed()) - url := fmt.Sprintf("%s://%d.example.com", protocol, i) - + var url string if protocol == "http" && portFwdPort != 0 { url = fmt.Sprintf("%s://%d.example.com:%d", protocol, i, portFwdPort) } else if protocol == "https" && portFwdHTTPSPort != 0 { url = fmt.Sprintf("%s://%d.example.com:%d", protocol, i, portFwdHTTPSPort) + } else { + url = fmt.Sprintf("%s://%d.example.com", protocol, i) } startCheck := time.Now() @@ -517,9 +518,11 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { Expect(resourceManager.ApplyFromFiles(upstreamsManifests, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - url := "http://hello.example.com" + var url string if portFwdPort != 0 { url = fmt.Sprintf("http://hello.example.com:%d", portFwdPort) + } else { + url = "http://hello.example.com" } Eventually( From 9bd87c757d63d13518f50559d3d67b3f2d22ec64 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 13:37:32 -0400 Subject: [PATCH 17/21] Remove unnecessary RemoveAll 2 --- tests/suite/scale_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 3a30efb3d..0a1b366a6 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -576,10 +576,6 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ) }, ) - - Expect( - os.RemoveAll(filepath.Join(testResultsDir, "manifests")), - ).To(Succeed()) }) It(fmt.Sprintf("scales HTTPS listeners to %d", httpsListenerCount), func() { From f1c1e41001e254c4e7aad016ef0723afb05a9bc1 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 14:59:49 -0400 Subject: [PATCH 18/21] Fix namespace deletion (!) --- tests/framework/prometheus.go | 10 +-------- tests/framework/resourcemanager.go | 32 +++++++++++++++++++++++++++ tests/framework/timeout.go | 4 ++++ tests/suite/dataplane_perf_test.go | 2 +- tests/suite/graceful_recovery_test.go | 2 +- tests/suite/longevity_test.go | 2 +- tests/suite/sample_test.go | 2 +- tests/suite/scale_test.go | 31 +++++++++++++------------- tests/suite/upgrade_test.go | 2 +- 9 files changed, 58 insertions(+), 29 deletions(-) diff --git a/tests/framework/prometheus.go b/tests/framework/prometheus.go index 15a018bde..3afe01521 100644 --- a/tests/framework/prometheus.go +++ b/tests/framework/prometheus.go @@ -13,8 +13,6 @@ import ( "github.com/prometheus/client_golang/api" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" - core "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -121,13 +119,7 @@ func UninstallPrometheus(rm ResourceManager) error { return fmt.Errorf("failed to uninstall Prometheus: %w; output: %s", err, string(output)) } - ns := &core.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: prometheusNamespace, - }, - } - - if err := rm.Delete([]client.Object{ns}); err != nil { + if err := rm.DeleteNamespace(prometheusNamespace); err != nil { return fmt.Errorf("failed to delete Prometheus namespace: %w", err) } diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index e1024ebf8..3cc73d08a 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -167,6 +167,38 @@ func (rm *ResourceManager) Delete(resources []client.Object, opts ...client.Dele return nil } +func (rm *ResourceManager) DeleteNamespace(name string) error { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.DeleteNamespaceTimeout) + defer cancel() + + ns := &core.Namespace{} + if err := rm.K8sClient.Get(ctx, types.NamespacedName{Name: name}, ns); err != nil { + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("error getting namespace: %w", err) + } + + if err := rm.K8sClient.Delete(ctx, ns); err != nil { + return fmt.Errorf("error deleting namespace: %w", err) + } + + // Because the namespace deletion is asynchronous, we need to wait for the namespace to be deleted. + return wait.PollUntilContextCancel( + ctx, + 500*time.Millisecond, + true, /* poll immediately */ + func(ctx context.Context) (bool, error) { + if err := rm.K8sClient.Get(ctx, types.NamespacedName{Name: name}, ns); err != nil { + if apierrors.IsNotFound(err) { + return true, nil + } + return false, fmt.Errorf("error getting namespace: %w", err) + } + return false, nil + }) +} + // DeleteFromFiles deletes Kubernetes resources defined within the provided YAML files. func (rm *ResourceManager) DeleteFromFiles(files []string, namespace string) error { handlerFunc := func(obj unstructured.Unstructured) error { diff --git a/tests/framework/timeout.go b/tests/framework/timeout.go index 4b837c757..2ed69457d 100644 --- a/tests/framework/timeout.go +++ b/tests/framework/timeout.go @@ -12,6 +12,9 @@ type TimeoutConfig struct { // DeleteTimeout represents the maximum time for a Kubernetes object to be deleted. DeleteTimeout time.Duration + // DeleteNamespaceTimeout represents the maximum time for a Kubernetes namespace to be deleted. + DeleteNamespaceTimeout time.Duration + // GetTimeout represents the maximum time to get a Kubernetes object. GetTimeout time.Duration @@ -34,6 +37,7 @@ func DefaultTimeoutConfig() TimeoutConfig { CreateTimeout: 60 * time.Second, UpdateTimeout: 60 * time.Second, DeleteTimeout: 10 * time.Second, + DeleteNamespaceTimeout: 60 * time.Second, GetTimeout: 10 * time.Second, ManifestFetchTimeout: 10 * time.Second, RequestTimeout: 10 * time.Second, diff --git a/tests/suite/dataplane_perf_test.go b/tests/suite/dataplane_perf_test.go index 27b465939..3860133e7 100644 --- a/tests/suite/dataplane_perf_test.go +++ b/tests/suite/dataplane_perf_test.go @@ -77,7 +77,7 @@ var _ = Describe("Dataplane performance", Ordered, Label("nfr", "performance"), AfterAll(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) outFile.Close() }) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index bf6c61295..b4148039e 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -78,7 +78,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov AfterAll(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) }) It("recovers when NGF container is restarted", func() { diff --git a/tests/suite/longevity_test.go b/tests/suite/longevity_test.go index ec60aa431..2ddf66c44 100644 --- a/tests/suite/longevity_test.go +++ b/tests/suite/longevity_test.go @@ -81,7 +81,7 @@ var _ = Describe("Longevity", Label("longevity-setup", "longevity-teardown"), fu Expect(writeTrafficResults(resultsFile, homeDir, "tea.txt", "HTTPS")).To(Succeed()) Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) }) }) diff --git a/tests/suite/sample_test.go b/tests/suite/sample_test.go index 3996c6764..56d08713a 100644 --- a/tests/suite/sample_test.go +++ b/tests/suite/sample_test.go @@ -34,7 +34,7 @@ var _ = Describe("Basic test example", Label("functional"), func() { AfterEach(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) }) It("sends traffic", func() { diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 0a1b366a6..666ab6a83 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -38,11 +38,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { "scale/upstreams.yaml", } - ns = &core.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: "scale", - }, - } + namespace = "scale" scrapeInterval = 15 * time.Second queryRangeStep = 15 * time.Second @@ -96,6 +92,11 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { cfg.nfr = true setup(cfg) + ns := &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespace, + }, + } Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) @@ -475,7 +476,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) defer cancel() - Expect(resourceManager.WaitForPodsToBeReady(ctx, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForPodsToBeReady(ctx, namespace)).To(Succeed()) for i := 0; i < len(objects.ScaleIterationGroups); i++ { Expect(resourceManager.Apply(objects.ScaleIterationGroups[i])).To(Succeed()) @@ -515,8 +516,8 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { } runScaleUpstreams := func() { - Expect(resourceManager.ApplyFromFiles(upstreamsManifests, ns.Name)).To(Succeed()) - Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(upstreamsManifests, namespace)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(namespace)).To(Succeed()) var url string if portFwdPort != 0 { @@ -530,13 +531,13 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) Expect( - resourceManager.ScaleDeployment(ns.Name, "backend", upstreamServerCount), + resourceManager.ScaleDeployment(namespace, "backend", upstreamServerCount), ).To(Succeed()) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() - Expect(resourceManager.WaitForPodsToBeReady(ctx, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForPodsToBeReady(ctx, namespace)).To(Succeed()) Eventually( createResponseChecker(url, address), @@ -545,11 +546,11 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { setNamespace := func(objects framework.ScaleObjects) { for _, obj := range objects.BaseObjects { - obj.SetNamespace(ns.Name) + obj.SetNamespace(namespace) } for _, objs := range objects.ScaleIterationGroups { for _, obj := range objs { - obj.SetNamespace(ns.Name) + obj.SetNamespace(namespace) } } } @@ -644,8 +645,8 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { It("scales HTTP matches", func() { const testName = "TestScale_HTTPMatches" - Expect(resourceManager.ApplyFromFiles(matchesManifests, ns.Name)).To(Succeed()) - Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(matchesManifests, namespace)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(namespace)).To(Succeed()) var port int if portFwdPort != 0 { @@ -700,7 +701,7 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { AfterEach(func() { teardown(releaseName) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(namespace)).To(Succeed()) }) AfterAll(func() { diff --git a/tests/suite/upgrade_test.go b/tests/suite/upgrade_test.go index 89e785f4f..e74929207 100644 --- a/tests/suite/upgrade_test.go +++ b/tests/suite/upgrade_test.go @@ -76,7 +76,7 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { AfterEach(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) resultsFile.Close() }) From 9265787223454b2ce95a14a2bcb5012951e53b8b Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 16:01:58 -0400 Subject: [PATCH 19/21] Fix log checking for NGF for NGINX Plus - ignore usage report errors (those errors ask users to configure usage reporting, which we don't need for tests) --- tests/suite/scale_test.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go index 666ab6a83..8d7c187db 100644 --- a/tests/suite/scale_test.go +++ b/tests/suite/scale_test.go @@ -190,7 +190,12 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { return buckets } - checkLogErrors := func(containerName string, substrings []string, fileName string) int { + checkLogErrors := func( + containerName string, + substrings []string, + ignoredSubstrings []string, + fileName string, + ) int { logs, err := resourceManager.GetPodLogs(ngfNamespace, ngfPodName, &core.PodLogOptions{ Container: containerName, }) @@ -201,6 +206,11 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { outer: for _, line := range logLines { + for _, substr := range ignoredSubstrings { + if strings.Contains(line, substr) { + continue outer + } + } for _, substr := range substrings { if strings.Contains(line, substr) { errors++ @@ -418,11 +428,13 @@ var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { ngfErrors := checkLogErrors( "nginx-gateway", []string{"error"}, + []string{`"logger":"usageReporter`}, // ignore usageReporter errors filepath.Join(testResultsDir, framework.CreateResultsFilename("log", "ngf", *plusEnabled)), ) nginxErrors := checkLogErrors( "nginx", []string{"[error]", "[emerg]", "[crit]", "[alert]"}, + nil, filepath.Join(testResultsDir, framework.CreateResultsFilename("log", "nginx", *plusEnabled)), ) From 700190355406175c12d4ff3197b74bb85b600339 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Thu, 16 May 2024 16:02:36 -0400 Subject: [PATCH 20/21] Fix Makefile test target to allow NGINX Plus image --- tests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Makefile b/tests/Makefile index 2eb920342..947fe6812 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -107,7 +107,7 @@ test: ## Runs the functional tests on your default k8s cluster --label-filter "functional" $(GINKGO_FLAGS) ./suite -- \ --gateway-api-version=$(GW_API_VERSION) --gateway-api-prev-version=$(GW_API_PREV_VERSION) \ --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ - --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) \ + --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) --nginx-plus-image-repo=$(NGINX_PLUS_PREFIX) \ --pull-policy=$(PULL_POLICY) --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) \ --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) From 0a4680ee9fde37eb4a8f107917ed3a70a8fbad81 Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Fri, 17 May 2024 15:42:40 -0400 Subject: [PATCH 21/21] move the scale results and delete the scale folder --- .../results => results/scale}/1.0.0/1.0.0.md | 0 .../TestScale_HTTPRoutes/CPU-no-delay.png | Bin .../scale}/1.0.0/TestScale_HTTPRoutes/CPU.png | Bin .../TestScale_HTTPRoutes/Memory-no-delay.png | Bin .../1.0.0/TestScale_HTTPRoutes/Memory.png | Bin .../scale}/1.0.0/TestScale_HTTPRoutes/TTR.png | Bin .../TestScale_HTTPRoutes/results-no-delay.csv | 0 .../1.0.0/TestScale_HTTPRoutes/results.csv | 0 .../1.0.0/TestScale_HTTPSListeners/CPU.png | Bin .../1.0.0/TestScale_HTTPSListeners/Memory.png | Bin .../1.0.0/TestScale_HTTPSListeners/TTR.png | Bin .../TestScale_HTTPSListeners/results.csv | 0 .../scale}/1.0.0/TestScale_Listeners/CPU.png | Bin .../1.0.0/TestScale_Listeners/Memory.png | Bin .../scale}/1.0.0/TestScale_Listeners/TTR.png | Bin .../1.0.0/TestScale_Listeners/results.csv | 0 .../1.0.0/TestScale_UpstreamServers/CPU.png | Bin .../TestScale_UpstreamServers/Memory.png | Bin .../results => results/scale}/1.1.0/1.1.0.md | 0 .../TestScale_HTTPRoutes/CPU-no-delay.png | Bin .../scale}/1.1.0/TestScale_HTTPRoutes/CPU.png | Bin .../TestScale_HTTPRoutes/Memory-no-delay.png | Bin .../1.1.0/TestScale_HTTPRoutes/Memory.png | Bin .../scale}/1.1.0/TestScale_HTTPRoutes/TTR.png | Bin .../TestScale_HTTPRoutes/results-no-delay.csv | 0 .../1.1.0/TestScale_HTTPRoutes/results.csv | 0 .../1.1.0/TestScale_HTTPSListeners/CPU.png | Bin .../1.1.0/TestScale_HTTPSListeners/Memory.png | Bin .../1.1.0/TestScale_HTTPSListeners/TTR.png | Bin .../TestScale_HTTPSListeners/results.csv | 0 .../scale}/1.1.0/TestScale_Listeners/CPU.png | Bin .../1.1.0/TestScale_Listeners/Memory.png | Bin .../scale}/1.1.0/TestScale_Listeners/TTR.png | Bin .../1.1.0/TestScale_Listeners/results.csv | 0 .../1.1.0/TestScale_UpstreamServers/CPU.png | Bin .../TestScale_UpstreamServers/Memory.png | Bin .../results => results/scale}/1.2.0/1.2.0.md | 0 .../scale}/1.2.0/TestScale_HTTPRoutes/CPU.png | Bin .../1.2.0/TestScale_HTTPRoutes/Memory.png | Bin .../TestScale_HTTPRoutes/TTR-without-peak.png | Bin .../scale}/1.2.0/TestScale_HTTPRoutes/TTR.png | Bin .../1.2.0/TestScale_HTTPRoutes_Plus/CPU.png | Bin .../TestScale_HTTPRoutes_Plus/Memory.png | Bin .../1.2.0/TestScale_HTTPRoutes_Plus/TTR.png | Bin .../1.2.0/TestScale_HTTPSListeners/CPU.png | Bin .../1.2.0/TestScale_HTTPSListeners/Memory.png | Bin .../1.2.0/TestScale_HTTPSListeners/TTR.png | Bin .../TestScale_HTTPSListeners_Plus/CPU.png | Bin .../TestScale_HTTPSListeners_Plus/Memory.png | Bin .../TestScale_HTTPSListeners_Plus/TTR.png | Bin .../scale}/1.2.0/TestScale_Listeners/CPU.png | Bin .../1.2.0/TestScale_Listeners/Memory.png | Bin .../scale}/1.2.0/TestScale_Listeners/TTR.png | Bin .../1.2.0/TestScale_Listeners_Plus/CPU.png | Bin .../1.2.0/TestScale_Listeners_Plus/Memory.png | Bin .../1.2.0/TestScale_Listeners_Plus/TTR.png | Bin .../1.2.0/TestScale_UpstreamServers/CPU.png | Bin .../TestScale_UpstreamServers/Memory.png | Bin .../TestScale_UpstreamServers_Plus/CPU.png | Bin .../TestScale_UpstreamServers_Plus/Memory.png | Bin tests/scale/scale.md | 527 ------------------ 61 files changed, 527 deletions(-) rename tests/{scale/results => results/scale}/1.0.0/1.0.0.md (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/CPU.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/Memory.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/TTR.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/results.csv (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPSListeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPSListeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPSListeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPSListeners/results.csv (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_Listeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_Listeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_Listeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_Listeners/results.csv (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_UpstreamServers/CPU.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_UpstreamServers/Memory.png (100%) rename tests/{scale/results => results/scale}/1.1.0/1.1.0.md (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/CPU.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/Memory.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/TTR.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/results.csv (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPSListeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPSListeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPSListeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPSListeners/results.csv (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_Listeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_Listeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_Listeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_Listeners/results.csv (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_UpstreamServers/CPU.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_UpstreamServers/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/1.2.0.md (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners_Plus/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners_Plus/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners_Plus/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_UpstreamServers/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_UpstreamServers/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_UpstreamServers_Plus/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_UpstreamServers_Plus/Memory.png (100%) delete mode 100644 tests/scale/scale.md diff --git a/tests/scale/results/1.0.0/1.0.0.md b/tests/results/scale/1.0.0/1.0.0.md similarity index 100% rename from tests/scale/results/1.0.0/1.0.0.md rename to tests/results/scale/1.0.0/1.0.0.md diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/CPU.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/CPU.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/CPU.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/CPU.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/Memory.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/Memory.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/Memory.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/Memory.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/TTR.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/TTR.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/TTR.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/TTR.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/results.csv b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/results.csv similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/results.csv rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/results.csv diff --git a/tests/scale/results/1.0.0/TestScale_HTTPSListeners/CPU.png b/tests/results/scale/1.0.0/TestScale_HTTPSListeners/CPU.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPSListeners/CPU.png rename to tests/results/scale/1.0.0/TestScale_HTTPSListeners/CPU.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPSListeners/Memory.png b/tests/results/scale/1.0.0/TestScale_HTTPSListeners/Memory.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPSListeners/Memory.png rename to tests/results/scale/1.0.0/TestScale_HTTPSListeners/Memory.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPSListeners/TTR.png b/tests/results/scale/1.0.0/TestScale_HTTPSListeners/TTR.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPSListeners/TTR.png rename to tests/results/scale/1.0.0/TestScale_HTTPSListeners/TTR.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPSListeners/results.csv b/tests/results/scale/1.0.0/TestScale_HTTPSListeners/results.csv similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPSListeners/results.csv rename to tests/results/scale/1.0.0/TestScale_HTTPSListeners/results.csv diff --git a/tests/scale/results/1.0.0/TestScale_Listeners/CPU.png b/tests/results/scale/1.0.0/TestScale_Listeners/CPU.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_Listeners/CPU.png rename to tests/results/scale/1.0.0/TestScale_Listeners/CPU.png diff --git a/tests/scale/results/1.0.0/TestScale_Listeners/Memory.png b/tests/results/scale/1.0.0/TestScale_Listeners/Memory.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_Listeners/Memory.png rename to tests/results/scale/1.0.0/TestScale_Listeners/Memory.png diff --git a/tests/scale/results/1.0.0/TestScale_Listeners/TTR.png b/tests/results/scale/1.0.0/TestScale_Listeners/TTR.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_Listeners/TTR.png rename to tests/results/scale/1.0.0/TestScale_Listeners/TTR.png diff --git a/tests/scale/results/1.0.0/TestScale_Listeners/results.csv b/tests/results/scale/1.0.0/TestScale_Listeners/results.csv similarity index 100% rename from tests/scale/results/1.0.0/TestScale_Listeners/results.csv rename to tests/results/scale/1.0.0/TestScale_Listeners/results.csv diff --git a/tests/scale/results/1.0.0/TestScale_UpstreamServers/CPU.png b/tests/results/scale/1.0.0/TestScale_UpstreamServers/CPU.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_UpstreamServers/CPU.png rename to tests/results/scale/1.0.0/TestScale_UpstreamServers/CPU.png diff --git a/tests/scale/results/1.0.0/TestScale_UpstreamServers/Memory.png b/tests/results/scale/1.0.0/TestScale_UpstreamServers/Memory.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_UpstreamServers/Memory.png rename to tests/results/scale/1.0.0/TestScale_UpstreamServers/Memory.png diff --git a/tests/scale/results/1.1.0/1.1.0.md b/tests/results/scale/1.1.0/1.1.0.md similarity index 100% rename from tests/scale/results/1.1.0/1.1.0.md rename to tests/results/scale/1.1.0/1.1.0.md diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/CPU.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/CPU.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/CPU.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/CPU.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/Memory.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/Memory.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/Memory.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/Memory.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/TTR.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/TTR.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/TTR.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/TTR.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/results.csv b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/results.csv similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/results.csv rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/results.csv diff --git a/tests/scale/results/1.1.0/TestScale_HTTPSListeners/CPU.png b/tests/results/scale/1.1.0/TestScale_HTTPSListeners/CPU.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPSListeners/CPU.png rename to tests/results/scale/1.1.0/TestScale_HTTPSListeners/CPU.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPSListeners/Memory.png b/tests/results/scale/1.1.0/TestScale_HTTPSListeners/Memory.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPSListeners/Memory.png rename to tests/results/scale/1.1.0/TestScale_HTTPSListeners/Memory.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPSListeners/TTR.png b/tests/results/scale/1.1.0/TestScale_HTTPSListeners/TTR.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPSListeners/TTR.png rename to tests/results/scale/1.1.0/TestScale_HTTPSListeners/TTR.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPSListeners/results.csv b/tests/results/scale/1.1.0/TestScale_HTTPSListeners/results.csv similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPSListeners/results.csv rename to tests/results/scale/1.1.0/TestScale_HTTPSListeners/results.csv diff --git a/tests/scale/results/1.1.0/TestScale_Listeners/CPU.png b/tests/results/scale/1.1.0/TestScale_Listeners/CPU.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_Listeners/CPU.png rename to tests/results/scale/1.1.0/TestScale_Listeners/CPU.png diff --git a/tests/scale/results/1.1.0/TestScale_Listeners/Memory.png b/tests/results/scale/1.1.0/TestScale_Listeners/Memory.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_Listeners/Memory.png rename to tests/results/scale/1.1.0/TestScale_Listeners/Memory.png diff --git a/tests/scale/results/1.1.0/TestScale_Listeners/TTR.png b/tests/results/scale/1.1.0/TestScale_Listeners/TTR.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_Listeners/TTR.png rename to tests/results/scale/1.1.0/TestScale_Listeners/TTR.png diff --git a/tests/scale/results/1.1.0/TestScale_Listeners/results.csv b/tests/results/scale/1.1.0/TestScale_Listeners/results.csv similarity index 100% rename from tests/scale/results/1.1.0/TestScale_Listeners/results.csv rename to tests/results/scale/1.1.0/TestScale_Listeners/results.csv diff --git a/tests/scale/results/1.1.0/TestScale_UpstreamServers/CPU.png b/tests/results/scale/1.1.0/TestScale_UpstreamServers/CPU.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_UpstreamServers/CPU.png rename to tests/results/scale/1.1.0/TestScale_UpstreamServers/CPU.png diff --git a/tests/scale/results/1.1.0/TestScale_UpstreamServers/Memory.png b/tests/results/scale/1.1.0/TestScale_UpstreamServers/Memory.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_UpstreamServers/Memory.png rename to tests/results/scale/1.1.0/TestScale_UpstreamServers/Memory.png diff --git a/tests/scale/results/1.2.0/1.2.0.md b/tests/results/scale/1.2.0/1.2.0.md similarity index 100% rename from tests/scale/results/1.2.0/1.2.0.md rename to tests/results/scale/1.2.0/1.2.0.md diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes/CPU.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes/CPU.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes/Memory.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes/Memory.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes/TTR.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes/TTR.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners/CPU.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners/CPU.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners/Memory.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners/Memory.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners/TTR.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners/TTR.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners/CPU.png b/tests/results/scale/1.2.0/TestScale_Listeners/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners/CPU.png rename to tests/results/scale/1.2.0/TestScale_Listeners/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners/Memory.png b/tests/results/scale/1.2.0/TestScale_Listeners/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners/Memory.png rename to tests/results/scale/1.2.0/TestScale_Listeners/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners/TTR.png b/tests/results/scale/1.2.0/TestScale_Listeners/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners/TTR.png rename to tests/results/scale/1.2.0/TestScale_Listeners/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners_Plus/CPU.png b/tests/results/scale/1.2.0/TestScale_Listeners_Plus/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners_Plus/CPU.png rename to tests/results/scale/1.2.0/TestScale_Listeners_Plus/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners_Plus/Memory.png b/tests/results/scale/1.2.0/TestScale_Listeners_Plus/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners_Plus/Memory.png rename to tests/results/scale/1.2.0/TestScale_Listeners_Plus/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners_Plus/TTR.png b/tests/results/scale/1.2.0/TestScale_Listeners_Plus/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners_Plus/TTR.png rename to tests/results/scale/1.2.0/TestScale_Listeners_Plus/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_UpstreamServers/CPU.png b/tests/results/scale/1.2.0/TestScale_UpstreamServers/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_UpstreamServers/CPU.png rename to tests/results/scale/1.2.0/TestScale_UpstreamServers/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_UpstreamServers/Memory.png b/tests/results/scale/1.2.0/TestScale_UpstreamServers/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_UpstreamServers/Memory.png rename to tests/results/scale/1.2.0/TestScale_UpstreamServers/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_UpstreamServers_Plus/CPU.png b/tests/results/scale/1.2.0/TestScale_UpstreamServers_Plus/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_UpstreamServers_Plus/CPU.png rename to tests/results/scale/1.2.0/TestScale_UpstreamServers_Plus/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_UpstreamServers_Plus/Memory.png b/tests/results/scale/1.2.0/TestScale_UpstreamServers_Plus/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_UpstreamServers_Plus/Memory.png rename to tests/results/scale/1.2.0/TestScale_UpstreamServers_Plus/Memory.png diff --git a/tests/scale/scale.md b/tests/scale/scale.md deleted file mode 100644 index 328378596..000000000 --- a/tests/scale/scale.md +++ /dev/null @@ -1,527 +0,0 @@ -# Scale Tests - -This document describes how we scale test NGF. - - -- [Scale Tests](#scale-tests) - - [Goals](#goals) - - [Test Environment](#test-environment) - - [Steps](#steps) - - [Setup](#setup) - - [Run the tests](#run-the-tests) - - [Scale Listeners to Max of 64](#scale-listeners-to-max-of-64) - - [Scale HTTPS Listeners to Max of 64](#scale-https-listeners-to-max-of-64) - - [Scale HTTPRoutes](#scale-httproutes) - - [Scale Upstream Servers](#scale-upstream-servers) - - [Scale HTTP Matches](#scale-http-matches) - - [Analyze](#analyze) - - [Results](#results) - - -## Goals - -- Measure how NGF performs when the number of Gateway API and referenced core Kubernetes resources are scaled. -- Test the following number of resources: - - Max number of HTTP and HTTPS Listeners (64) - - Max number of Upstream Servers (648) - - Max number of HTTPMatches - - 1000 HTTPRoutes - -## Test Environment - -For most of the tests, the following cluster will be sufficient: - -- A Kubernetes cluster with 4 nodes on GKE - - Node: n2d-standard-8 (8 vCPU, 32GB memory) - - Enabled GKE logging -- A GKE VM with access to the cluster. Send all requests from the GKE VM. - -The Upstream Server scale test requires a bigger cluster to accommodate the large number of Pods. Those cluster details -are listed in the [Scale Upstream Servers](#scale-upstream-servers) test steps. - -## Steps - -### Setup - -- Update the version in the [test](scale_test.go) to the release version - -- Install Gateway API Resources: - - ```console - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.1.0/standard-install.yaml - ``` - -- Install edge NGF: - - For OSS: - - ```console - helm install scale-test oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric --create-namespace --wait -n nginx-gateway --version=0.0.0-edge --set nginxGateway.config.logging.level=debug - ``` - - For Plus: - - - Build the NGINX Plus image - - Push the image to the GCR registry - - ```console - helm install scale-test oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric --create-namespace --wait -n nginx-gateway --version=0.0.0-edge --set nginxGateway.config.logging.level=debug --set nginx.plus=true --set nginx.image.repository= --set nginx.image.tag= - ``` - -- Save the Pod Name and LoadBalancer IP for tests: - - ```console - export NGF_IP=$(kubectl get svc -n nginx-gateway scale-test-nginx-gateway-fabric --output jsonpath='{.status.loadBalancer.ingress[0].ip}') - export NGF_POD=$(kubectl get pods -n nginx-gateway -l "app.kubernetes.io/name=nginx-gateway-fabric,app.kubernetes.io/instance=scale-test" -o jsonpath="{.items[0].metadata.name}") - ``` - -- Install Prometheus: - - ```console - kubectl apply -f manifests/prom-clusterrole.yaml - helm repo add prometheus-community https://prometheus-community.github.io/helm-charts - helm repo update - helm install prom prometheus-community/prometheus --set useExistingClusterRoleName=prometheus -n prom - ``` - -- Create a directory under [results](/tests/scale/results) and name it after the version of NGF you are testing. Then - create a file for the result summary, also named after the NGF version. For - example: [1.0.0.md](/tests/scale/results/1.0.0/1.0.0.md). - -### Run the tests - -Run the following tests for both N+ and OSS. - -#### Scale Listeners to Max of 64 - -Test Goal: Measure how NGF performs as the number of Listeners increases to the max of 64. - -Test Plan: - -- Scale up to 64 HTTP Listeners -- All Listeners are on a single Gateway -- Each Listener has 1 HTTPRoute attached -- Each HTTPRoute references 1 unique Service -- Services and Deployments are created before scaling Listeners. -- After each Listener + HTTPRoute is created, measure the time it takes to get a successful response from the new - route (time to ready). -- Record the time to ready in seconds in a csv file for each iteration. - -Total Resources Created: - -- 1 Gateway with 64 Listeners -- 64 HTTPRoutes -- 64 Services, Deployments, Pods - -Follow the steps below to run the test: - -- Run the test: - - With OSS: - - ```console - go test -v -tags scale -run TestScale_Listeners -i 64 - ``` - - With Plus: - - ```console - go test -v -tags scale -run TestScale_Listeners -i 64 -plus - ``` - -- [Analyze](#analyze) the results. - -- Clean up: - - Delete resources from cluster: - - ```console - kubectl delete -Rf TestScale_Listeners - ``` - - Delete generated manifests: - - ```console - rm -rf TestScale_Listeners - ``` - -- Check for any errors or restarts after cleanup. -- Check NGINX conf to make sure it looks correct. - -#### Scale HTTPS Listeners to Max of 64 - -Test Goal: Measure how NGF performs as the number of HTTPS Listeners increases to the max of 64. - -Test Plan: - -- Scale up to 64 HTTPS Listeners -- All Listeners are on a single Gateway -- Each Listener has 1 HTTPRoute attached -- Each Listener references a unique Secret -- Each HTTPRoute references 1 unique Service -- Services, Deployments, and Secrets are created before scaling Listeners -- After each Listener + HTTPRoute is created, measure the time it takes to get a successful response from the new - route (time to ready). -- Record the time to ready in seconds in a csv file for each iteration. - -Total Resources Created: - -- 1 Gateway with 64 HTTPS Listeners -- 64 Secrets -- 64 HTTPRoutes -- 64 Services, Deployments, Pods - -Follow the steps below to run the test: - -- Run the test: - - With OSS: - - ```console - go test -v -tags scale -run TestScale_HTTPSListeners -i 64 - ``` - - With Plus: - - ```console - go test -v -tags scale -run TestScale_HTTPSListeners -i 64 -plus - ``` - -- [Analyze](#analyze) the results. - -- Clean up: - - Delete resources from cluster: - - ```console - kubectl delete -Rf TestScale_HTTPSListeners - ``` - - Delete generated manifests: - - ```console - rm -rf TestScale_HTTPSListeners - ``` - -- Check for any errors or restarts after cleanup. -- Check NGINX conf to make sure it looks correct. - -#### Scale HTTPRoutes - -Test Goal: Measure how NGF performs as the number of HTTPRoutes increases to 1000. - -Test Plan: - -- Scale up to 1000 HTTPRoutes -- All HTTPRoutes attach to a single Gateway with one Listener -- Each HTTPRoute references the same Service -- Service and Deployment are created before scaling HTTPRoutes -- After each HTTPRoute is created, measure the time it takes to get a successful response from the new route (time to - ready). -- Record the time to ready in seconds in a csv file for each iteration. - -Total Resources Created: - -- 1 Gateway with 1 Listener -- 1000 HTTPRoutes -- 1 Service, Deployment, Pod - -Follow the steps below to run the test: - -- Run the test: - - With OSS: - - ```console - go test -v -tags scale -timeout 30m -run TestScale_HTTPRoutes -i 1000 - ``` - - With Plus: - - ```console - go test -v -tags scale -timeout 30m -run TestScale_HTTPRoutes -i 1000 -plus - ``` - - **Optional:** To test with a delay in between each new HTTPRoute, you can add the `-delay` flag to the above command. For example, - to add a 2-second delay: - - ```console - go test -v -tags scale -timeout 60m -run TestScale_HTTPRoutes -i 1000 -delay 2s - ``` - - The test takes longer to run with a delay so make sure to adjust the timeout value. - -- [Analyze](#analyze) the results. - -- Clean up: - - Delete resources from cluster: - - ```console - kubectl delete -Rf TestScale_HTTPRoutes - ``` - - Delete generated manifests: - - ```console - rm -rf TestScale_HTTPRoutes - ``` - -- Check for any errors or restarts after cleanup. -- Check NGINX conf to make sure it looks correct. - -#### Scale Upstream Servers - -Test Goal: Measure how NGF performs as the number of Upstream Servers increases to the max of 648. - -Test Plan: - -- Deploy a single Gateway with 1 Listener and attach one HTTPRoute that references a single Service -- Scale the deployment for that Service to 648 Pods for OSS and 556 Pods for Plus (these are the limits that the upstream zone size allows) -- Gateway, HTTPRoute, Service, and Deployment with 1 replica are created before scaling up to 648 replicas. - -Total Resources Created: - -- 1 Gateway with 1 Listener -- 1 HTTPRoutes -- 1 Service, 1 Deployment, 648 Pods - -Test Environment: - -For this test you must use a much bigger cluster in order to create 648 Pods. - -- A Kubernetes cluster with 12 nodes on GKE - - Node: n2d-standard-16 (16 vCPU, 64GB memory) - - Enabled GKE logging - -Follow the steps below to run the test: - -- Apply manifest - - ```console - kubectl apply -f manifests/scale-upstreams.yaml - ``` - -- Check the status of the Gateway and HTTPRoute to make sure everything is OK before scaling. - - ```console - kubectl describe gateway gateway - kubectl describe httproute route - ``` - -- Get the start time as a UNIX timestamp and record it in the results. - - ```console - date +%s - ``` - - This will be used in the metrics query. - -- Open a new terminal window and start the following loop: - - ```console - for i in $(seq 1 150); do curl --resolve cafe.example.com:80:$NGF_IP http://cafe.example.com:80/; sleep 1; done >> requests.log - ``` - -- Back in your original terminal, scale the backend app: - - For OSS: - - ```console - kubectl scale deploy backend --replicas 648 - ``` - - For Plus: - - ```console - kubectl scale deploy backend --replicas 556 - ``` - -- Wait for all Pods to become available: - - ```console - watch kubectl get deploy backend - ``` - -- Check the NGINX config for 648 upstream servers: - - ```console - kubectl exec -it -n nginx-gateway $NGF_POD -c nginx -- nginx -T | grep -E "server (?:[0-9]{1,3}\.){3}[0-9]{1,3}:8080" | wc -l - ``` - -- Get the end time as a UNIX timestamp and make a note of it: - - ```console - date +%s - ``` - -- In the terminal you started the request loop, kill the loop if it's still running and check the request.log to see if - any of the requests failed. Record any failures in the results file. - -- [Analyze](#analyze) the results. Use the start time and end time you made note of earlier for the - queries. You can calculate the test duration in seconds by subtracting the start time from the end time. - -- Clean up: - - ```console - kubectl delete -f manifests/scale-upstreams.yaml - ``` - -- Check for any errors or restarts after cleanup. -- Check NGINX conf to make sure it looks correct. - -#### Scale HTTP Matches - -Test Goal: Find the difference in latency between the first match and last match for the max length of -the `http_matches` variable. - -Test Plan: - -- Deploy a single Gateway with 1 Listener and attach one HTTPRoute that references a single Service -- Within the HTTPRoute configure the max number of matches (max is determined by the length of the - generated `http_matches` variable (4096 characters)) -- Use `wrk` to send requests to the _first_ match in `http_matches` list and measure the latency -- Use `wrk` to send requests to the _last_ match in `http_matches` list and measure the latency - -Total Resources Created: - -- 1 Gateway with 1 Listener -- 1 HTTPRoute with 7 rules and 50 matches -- 1 Service, 1 Deployment, 1 Pod - -Follow these steps to run the test: - -- Download [wrk](https://github.com/wg/wrk) - -- Apply manifest: - - ```console - kubectl apply -f manifests/scale-matches.yaml - ``` - -- Check the status of the Gateway and HTTPRoute to make sure everything is OK before scaling. - - ```console - kubectl describe gateway gateway - kubectl describe httproute route - ``` - -- Edit your /etc/hosts file and add an entry for "NGF_IP cafe.example.com". - -- Test the first match: - - ```console - wrk -t2 -c10 -d30 http://cafe.example.com -H "header-1: header-1-val" - ``` - -- Test the last match: - - ```console - wrk -t2 -c10 -d30 http://cafe.example.com -H "header-50: header-50-val" - ``` - -- Copy and paste the results into the results file. - -- Clean up: - - ```console - kubectl delete -f manifests/scale-matches.yaml - ``` - -### Analyze - -- Query Prometheus for reload metrics. To access the Prometheus Server, run: - - ```console - export POD_NAME=$(kubectl get pods --namespace prom -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prom" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prom port-forward $POD_NAME 9090 - ``` - - To query Prometheus, you can either browse to localhost:9090 or use curl. The following instructions assume you are - using the prom GUI. - - > Note: - > For the tests that write to a csv file, the `Test Start`, `Test End + 10s`, and `Duration` are at the - > end of the results.csv file in the `results//` directory. - > If you ran the tests with the `plus` flag, the test directory is named `results//_Plus`. - > We are using `Test End + 10s` in the Prometheus query to account for the 10s scraping interval. - - Total number of reloads: - - ```console - nginx_gateway_fabric_nginx_reloads_total - nginx_gateway_fabric_nginx_reloads_total @ - ``` - - Total number of reload errors: - - ```console - nginx_gateway_fabric_nginx_reload_errors_total - nginx_gateway_fabric_nginx_reload_errors_total @ - ``` - - Average reload time (ms): - - ```console - rate(nginx_gateway_fabric_nginx_reloads_milliseconds_sum[] @ ) / - rate(nginx_gateway_fabric_nginx_reloads_milliseconds_count[] @ ) - ``` - - Reload Time Distribution: - - ```console - nginx_gateway_fabric_nginx_reloads_milliseconds_bucket - nginx_gateway_fabric_nginx_reloads_milliseconds_bucket @ - ``` - - Total number of event batches processed: - - ```console - nginx_gateway_fabric_event_batch_processing_milliseconds_count - nginx_gateway_fabric_event_batch_processing_milliseconds_count @ - ``` - - Average event batch processing time (ms): - - ```console - rate(nginx_gateway_fabric_event_batch_processing_milliseconds_sum[] @ ) / - rate(nginx_gateway_fabric_event_batch_processing_milliseconds_count[] @ ) - ``` - - Event Batch Processing Time Distribution: - - ```console - nginx_gateway_fabric_event_batch_processing_milliseconds_bucket - nginx_gateway_fabric_event_batch_processing_milliseconds_bucket @ - ``` - - Record these numbers in a table in the results file. - -- Take screenshots of memory and CPU usage in GKE Dashboard - - To Monitor memory and CPU usage, navigate to the Kubernetes Engine > Workloads > Filter by `nginx-gateway` namespace > - click on NGF Pod name. You should see graphs for CPU, Memory, and Disk. - - - Convert the `Start Time` and `End Time` UNIX timestamps to your local date time: - - ```console - date -r - ``` - - - Create a custom time frame for the graphs in GKE. - - Take a screenshot of the CPU and Memory graphs individually. Store them in the `results//` - directory. - -- If the test writes time to ready numbers to a csv, create a time to ready graph. - - Use https://chart-studio.plotly.com/create/#/ to plot the time to ready numbers on a graph. - - Remove the `"Test Start", "Test End", "Test End + 10s", "Duration"` rows from the bottom of the csv. - - Upload the csv file to plotly. - - Create a new `Trace`, select `Line` as the type. - - Set the Y axis to the Time to Ready column. - - Set the X axis to the number of resources column. - - Label the graph and take a screenshot. - - Store the graph in the `results//` directory. - -- Check for errors or restarts and record in the results file. File a bug if there's unexpected errors or restarts. -- Check NGINX conf and make sure it looks correct. File a bug if there is an issue. - -### Results - -- [1.0.0](/tests/scale/results/1.0.0/1.0.0.md) -- [1.1.0](/tests/scale/results/1.1.0/1.1.0.md) -- [1.2.0](/tests/scale/results/1.2.0/1.2.0.md)