From 2d665ab1cc4b0383dd3128d6bd47cdbdaa4b5bde Mon Sep 17 00:00:00 2001 From: Bojan Zelic Date: Mon, 18 Sep 2023 11:24:09 -0700 Subject: [PATCH 1/6] General: metrics server expose apiserver metrics Signed-off-by: Bojan Zelic --- CHANGELOG.md | 1 + cmd/adapter/main.go | 46 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e144e353289..7a4705519db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ To learn more about active deprecations, we recommend checking [GitHub Discussio - **General**: Adding a changelog validating script to check for formatting and order ([#3190](https://github.com/kedacore/keda/issues/3190)) - **General**: Update golangci-lint version documented in CONTRIBUTING.md since old version doesn't support go 1.20 (N/A) - **General**: Updated AWS SDK and updated all the aws scalers ([#4905](https://github.com/kedacore/keda/issues/4905)) +- **General**: Add apiserver prometheus metrics to keda metric server ([#4460](https://github.com/kedacore/keda/issues/4460)) - **Azure Pod Identity**: Introduce validation to prevent usage of empty identity ID for Azure identity providers ([#4528](https://github.com/kedacore/keda/issues/4528)) - **Prometheus Scaler**: Remove trailing whitespaces in customAuthHeader and customAuthValue ([#4960](https://github.com/kedacore/keda/issues/4960)) - **Pulsar Scaler**: Add support for OAuth extensions ([#4700](https://github.com/kedacore/keda/issues/4700)) diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index 590c97da7c9..f0591d8e467 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -20,15 +20,21 @@ import ( "context" "flag" "fmt" + "net/http" "os" appsv1 "k8s.io/api/apps/v1" + apimetrics "k8s.io/apiserver/pkg/endpoints/metrics" "k8s.io/client-go/kubernetes/scheme" + kubemetrics "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" "k8s.io/klog/v2" "k8s.io/klog/v2/klogr" ctrl "sigs.k8s.io/controller-runtime" ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/metrics/server" + basecmd "sigs.k8s.io/custom-metrics-apiserver/pkg/cmd" "sigs.k8s.io/custom-metrics-apiserver/pkg/provider" @@ -36,6 +42,7 @@ import ( "github.com/kedacore/keda/v2/pkg/metricsservice" kedaprovider "github.com/kedacore/keda/v2/pkg/provider" kedautil "github.com/kedacore/keda/v2/pkg/util" + "github.com/prometheus/client_golang/prometheus/collectors" ) // Adapter creates External Metrics Provider @@ -96,10 +103,9 @@ func (a *Adapter) makeProvider(ctx context.Context) (provider.ExternalMetricsPro cfg.Burst = adapterClientRequestBurst cfg.DisableCompression = disableCompression - metricsBindAddress := fmt.Sprintf(":%v", metricsAPIServerPort) mgr, err := ctrl.NewManager(cfg, ctrl.Options{ Metrics: server.Options{ - BindAddress: metricsBindAddress, + BindAddress: "0", // disabled since we use our own server to serve metrics }, Scheme: scheme, Cache: ctrlcache.Options{ @@ -131,6 +137,40 @@ func (a *Adapter) makeProvider(ctx context.Context) (provider.ExternalMetricsPro return kedaprovider.NewProvider(ctx, logger, mgr.GetClient(), *grpcClient), stopCh, nil } +func getMetricHandler() http.HandlerFunc { + // Register apiserver metrics in legacy registry + // this contains the apiserver_* metrics + apimetrics.Register() + + // unregister duplicate collectors that are already handled by controller-runtime's registry + legacyregistry.Registerer().Unregister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) + legacyregistry.Registerer().Unregister(collectors.NewGoCollector(collectors.WithGoCollectorRuntimeMetrics(collectors.MetricsAll))) + + // Return handler that serves metrics from both legacy and controller-runtime registry + return func(w http.ResponseWriter, req *http.Request) { + legacyregistry.Handler().ServeHTTP(w, req) + + kubemetrics.HandlerFor(ctrlmetrics.Registry, kubemetrics.HandlerOpts{}).ServeHTTP(w, req) + } +} + +func RunMetricsServer() { + h := getMetricHandler() + mux := http.NewServeMux() + mux.Handle("/metrics", h) + + metricsBindAddress := fmt.Sprintf(":%v", metricsAPIServerPort) + + go func() { + logger.Info("starting /metrics server endpoint") + // nosemgrep: use-tls + err := http.ListenAndServe(metricsBindAddress, mux) + if err != nil { + panic(err) + } + }() +} + // generateDefaultMetricsServiceAddr generates default Metrics Service gRPC Server address based on the current Namespace. // By default the Metrics Service gRPC Server runs in the same namespace on the keda-operator pod. func generateDefaultMetricsServiceAddr() string { @@ -196,6 +236,8 @@ func main() { cmd.WithExternalMetrics(kedaProvider) logger.Info(cmd.Message) + + RunMetricsServer() if err = cmd.Run(stopCh); err != nil { return } From 04a761ace357d3020f13c919764f9890e4f5f2c8 Mon Sep 17 00:00:00 2001 From: Bojan Zelic Date: Tue, 19 Sep 2023 16:26:49 -0700 Subject: [PATCH 2/6] General: metrics server expose apiserver metrics Signed-off-by: Bojan Zelic --- cmd/adapter/main.go | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index f0591d8e467..5a4f31b631c 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -137,6 +137,7 @@ func (a *Adapter) makeProvider(ctx context.Context) (provider.ExternalMetricsPro return kedaprovider.NewProvider(ctx, logger, mgr.GetClient(), *grpcClient), stopCh, nil } +// getMetricHandler returns a http handler that exposes metrics from controller-runtime and apiserver func getMetricHandler() http.HandlerFunc { // Register apiserver metrics in legacy registry // this contains the apiserver_* metrics @@ -154,21 +155,34 @@ func getMetricHandler() http.HandlerFunc { } } -func RunMetricsServer() { +// RunMetricsServer runs a http listener and handles the /metrics endpoint +func RunMetricsServer(stopCh <-chan struct{}) { h := getMetricHandler() mux := http.NewServeMux() mux.Handle("/metrics", h) - metricsBindAddress := fmt.Sprintf(":%v", metricsAPIServerPort) + server := &http.Server{ + Addr: metricsBindAddress, + Handler: mux, + } + go func() { logger.Info("starting /metrics server endpoint") // nosemgrep: use-tls - err := http.ListenAndServe(metricsBindAddress, mux) + err := server.ListenAndServe() if err != nil { panic(err) } }() + + go func() { + <-stopCh + logger.Info("Shutting down the /metrics server gracefully...") + if err := server.Shutdown(context.TODO()); err != nil { + logger.Error(err, "http server shutdown error") + } + }() } // generateDefaultMetricsServiceAddr generates default Metrics Service gRPC Server address based on the current Namespace. @@ -237,7 +251,8 @@ func main() { logger.Info(cmd.Message) - RunMetricsServer() + RunMetricsServer(stopCh) + if err = cmd.Run(stopCh); err != nil { return } From 3792589fbca5828911d9106b338db3c985e18215 Mon Sep 17 00:00:00 2001 From: Bojan Zelic Date: Tue, 19 Sep 2023 16:34:24 -0700 Subject: [PATCH 3/6] General: metrics server expose apiserver metrics Signed-off-by: Bojan Zelic --- cmd/adapter/main.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index 5a4f31b631c..8df9f91550d 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -22,6 +22,7 @@ import ( "fmt" "net/http" "os" + "time" appsv1 "k8s.io/api/apps/v1" apimetrics "k8s.io/apiserver/pkg/endpoints/metrics" @@ -179,7 +180,10 @@ func RunMetricsServer(stopCh <-chan struct{}) { go func() { <-stopCh logger.Info("Shutting down the /metrics server gracefully...") - if err := server.Shutdown(context.TODO()); err != nil { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + if err := server.Shutdown(ctx); err != nil { logger.Error(err, "http server shutdown error") } }() From 8896a024daf06a96802146c0bf18903eee039c4b Mon Sep 17 00:00:00 2001 From: Bojan Zelic Date: Tue, 19 Sep 2023 19:05:37 -0700 Subject: [PATCH 4/6] linting Signed-off-by: Bojan Zelic --- CHANGELOG.md | 3 +-- cmd/adapter/main.go | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a4705519db..9f470f9a4d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,14 +54,13 @@ To learn more about active deprecations, we recommend checking [GitHub Discussio - **Governance**: KEDA transitioned to CNCF Graduated project ([#63](https://github.com/kedacore/governance/issues/63)) ### Improvements - +- **General**: Add apiserver prometheus metrics to keda metric server ([#4460](https://github.com/kedacore/keda/issues/4460)) - **General**: Add more events for user checking ([#796](https://github.com/kedacore/keda/issues/3764)) - **General**: Add ScaledObject/ScaledJob names to output of `kubectl get triggerauthentication/clustertriggerauthentication` ([#796](https://github.com/kedacore/keda/issues/796)) - **General**: Add standalone CRD generation to release workflow ([#2726](https://github.com/kedacore/keda/issues/2726)) - **General**: Adding a changelog validating script to check for formatting and order ([#3190](https://github.com/kedacore/keda/issues/3190)) - **General**: Update golangci-lint version documented in CONTRIBUTING.md since old version doesn't support go 1.20 (N/A) - **General**: Updated AWS SDK and updated all the aws scalers ([#4905](https://github.com/kedacore/keda/issues/4905)) -- **General**: Add apiserver prometheus metrics to keda metric server ([#4460](https://github.com/kedacore/keda/issues/4460)) - **Azure Pod Identity**: Introduce validation to prevent usage of empty identity ID for Azure identity providers ([#4528](https://github.com/kedacore/keda/issues/4528)) - **Prometheus Scaler**: Remove trailing whitespaces in customAuthHeader and customAuthValue ([#4960](https://github.com/kedacore/keda/issues/4960)) - **Pulsar Scaler**: Add support for OAuth extensions ([#4700](https://github.com/kedacore/keda/issues/4700)) diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index 8df9f91550d..f73959d6f13 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -24,6 +24,7 @@ import ( "os" "time" + "github.com/prometheus/client_golang/prometheus/collectors" appsv1 "k8s.io/api/apps/v1" apimetrics "k8s.io/apiserver/pkg/endpoints/metrics" "k8s.io/client-go/kubernetes/scheme" @@ -35,7 +36,6 @@ import ( ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache" ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/metrics/server" - basecmd "sigs.k8s.io/custom-metrics-apiserver/pkg/cmd" "sigs.k8s.io/custom-metrics-apiserver/pkg/provider" @@ -43,7 +43,6 @@ import ( "github.com/kedacore/keda/v2/pkg/metricsservice" kedaprovider "github.com/kedacore/keda/v2/pkg/provider" kedautil "github.com/kedacore/keda/v2/pkg/util" - "github.com/prometheus/client_golang/prometheus/collectors" ) // Adapter creates External Metrics Provider From 89551ba14755d2d2428883082e3e9b493ec67a3e Mon Sep 17 00:00:00 2001 From: Bojan Zelic Date: Wed, 20 Sep 2023 11:27:22 -0700 Subject: [PATCH 5/6] General: metrics server expose apiserver metrics Signed-off-by: Bojan Zelic --- cmd/adapter/main.go | 9 ++--- .../prometheus_metrics_test.go | 39 ++++++++++++++++++- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index f73959d6f13..aed48156043 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -22,7 +22,6 @@ import ( "fmt" "net/http" "os" - "time" "github.com/prometheus/client_golang/prometheus/collectors" appsv1 "k8s.io/api/apps/v1" @@ -156,7 +155,7 @@ func getMetricHandler() http.HandlerFunc { } // RunMetricsServer runs a http listener and handles the /metrics endpoint -func RunMetricsServer(stopCh <-chan struct{}) { +func RunMetricsServer(ctx context.Context, stopCh <-chan struct{}) { h := getMetricHandler() mux := http.NewServeMux() mux.Handle("/metrics", h) @@ -171,7 +170,7 @@ func RunMetricsServer(stopCh <-chan struct{}) { logger.Info("starting /metrics server endpoint") // nosemgrep: use-tls err := server.ListenAndServe() - if err != nil { + if err != http.ErrServerClosed { panic(err) } }() @@ -179,8 +178,6 @@ func RunMetricsServer(stopCh <-chan struct{}) { go func() { <-stopCh logger.Info("Shutting down the /metrics server gracefully...") - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() if err := server.Shutdown(ctx); err != nil { logger.Error(err, "http server shutdown error") @@ -254,7 +251,7 @@ func main() { logger.Info(cmd.Message) - RunMetricsServer(stopCh) + RunMetricsServer(ctx, stopCh) if err = cmd.Run(stopCh); err != nil { return diff --git a/tests/sequential/prometheus_metrics/prometheus_metrics_test.go b/tests/sequential/prometheus_metrics/prometheus_metrics_test.go index 4bd2bb367af..f86cb1e3e1a 100644 --- a/tests/sequential/prometheus_metrics/prometheus_metrics_test.go +++ b/tests/sequential/prometheus_metrics/prometheus_metrics_test.go @@ -620,7 +620,9 @@ func testWebhookMetricValues(t *testing.T) { } func testMetricServerMetrics(t *testing.T) { - _ = fetchAndParsePrometheusMetrics(t, fmt.Sprintf("curl --insecure %s", kedaMetricsServerPrometheusURL)) + families := fetchAndParsePrometheusMetrics(t, fmt.Sprintf("curl --insecure %s", kedaMetricsServerPrometheusURL)) + checkMetricServerValues(t, families) + checkBuildInfo(t, families) } func testOperatorMetricValues(t *testing.T, kc *kubernetes.Clientset) { @@ -768,3 +770,38 @@ func checkWebhookValues(t *testing.T, families map[string]*prommodel.MetricFamil } assert.GreaterOrEqual(t, metricValue, 1.0, "keda_webhook_scaled_object_validation_total has to be greater than 0") } + +func checkMetricServerValues(t *testing.T, families map[string]*prommodel.MetricFamily) { + t.Log("--- testing metric server metrics ---") + + family, ok := families["workqueue_adds_total"] + if !ok { + t.Errorf("metric workqueue_adds_total not available") + return + } + + metricValue := 0.0 + metrics := family.GetMetric() + for _, metric := range metrics { + metricValue += *metric.Counter.Value + } + assert.GreaterOrEqual(t, metricValue, 1.0, "workqueue_adds_total has to be greater than 0") + + family, ok = families["apiserver_request_total"] + if !ok { + t.Errorf("metric apiserver_request_total not available") + return + } + + metricValue = 0.0 + metrics = family.GetMetric() + for _, metric := range metrics { + labels := metric.GetLabel() + for _, label := range labels { + if *label.Name == "group" && *label.Value == "external.metrics.k8s.io" { + metricValue = *metric.Counter.Value + } + } + } + assert.GreaterOrEqual(t, metricValue, 1.0, "apiserver_request_total has to be greater than 0") +} From 93a22923d462e9c52086a967abbccd4a487bf4e0 Mon Sep 17 00:00:00 2001 From: Bojan Zelic Date: Thu, 21 Sep 2023 09:29:14 -0700 Subject: [PATCH 6/6] General: metrics server expose apiserver metrics Signed-off-by: Bojan Zelic --- CHANGELOG.md | 2 +- cmd/adapter/main.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f470f9a4d7..39b2f8ff349 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,7 +54,7 @@ To learn more about active deprecations, we recommend checking [GitHub Discussio - **Governance**: KEDA transitioned to CNCF Graduated project ([#63](https://github.com/kedacore/governance/issues/63)) ### Improvements -- **General**: Add apiserver prometheus metrics to keda metric server ([#4460](https://github.com/kedacore/keda/issues/4460)) +- **General**: Add apiserver Prometheus metrics to KEDA Metric Server ([#4460](https://github.com/kedacore/keda/issues/4460)) - **General**: Add more events for user checking ([#796](https://github.com/kedacore/keda/issues/3764)) - **General**: Add ScaledObject/ScaledJob names to output of `kubectl get triggerauthentication/clustertriggerauthentication` ([#796](https://github.com/kedacore/keda/issues/796)) - **General**: Add standalone CRD generation to release workflow ([#2726](https://github.com/kedacore/keda/issues/2726)) diff --git a/cmd/adapter/main.go b/cmd/adapter/main.go index aed48156043..7f17bc105a1 100644 --- a/cmd/adapter/main.go +++ b/cmd/adapter/main.go @@ -155,6 +155,9 @@ func getMetricHandler() http.HandlerFunc { } // RunMetricsServer runs a http listener and handles the /metrics endpoint +// this is needed to consolidate apiserver and controller-runtime metrics +// we have to use a separate http server & can't rely on the controller-runtime implementation +// because apiserver doesn't provide a way to register metrics to other prometheus registries func RunMetricsServer(ctx context.Context, stopCh <-chan struct{}) { h := getMetricHandler() mux := http.NewServeMux()