From 16a39a62a35e7c0594ec589aead5649bffa290c9 Mon Sep 17 00:00:00 2001 From: Connor Rogers <23215294+coro@users.noreply.github.com> Date: Fri, 10 Jun 2022 15:24:16 +0100 Subject: [PATCH 1/4] Add pprof debug endpoint Allow users to opt-in to exposing pprof debugging endpoints on the operator's metrics port. Users may add the `ENABLE_DEBUG_PPROF` env variable to the operator Pod to enable the pprof endpoints. This is an opt-in pattern, as memory/CPU profiling can have a non-insignificant impact on runtime perf, and could be a denial of service vector. These endpoints should not be enabled in production. With this feature enabled, users can access the pprof output at `:/debug/pprof`. For example, after port-forwarding to the operator Pod: ``` go tool pprof -inuse_space -web http://localhost:9782/debug/pprof/heap ``` --- Dockerfile | 1 + Makefile | 2 +- main.go | 12 ++++++ pkg/profiling/pprof.go | 28 ++++++++++++++ pkg/profiling/pprof_test.go | 53 +++++++++++++++++++++++++++ pkg/profiling/profiling_suite_test.go | 48 ++++++++++++++++++++++++ 6 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 pkg/profiling/pprof.go create mode 100644 pkg/profiling/pprof_test.go create mode 100644 pkg/profiling/profiling_suite_test.go diff --git a/Dockerfile b/Dockerfile index 262b40ca5..e63318403 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,6 +13,7 @@ COPY main.go main.go COPY api/ api/ COPY controllers/ controllers/ COPY internal/ internal/ +COPY pkg/ pkg/ # Build RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -tags timetzdata -o manager main.go diff --git a/Makefile b/Makefile index d5abdb332..f7409185a 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ kubebuilder-assets: $(KUBEBUILDER_ASSETS) .PHONY: unit-tests unit-tests: install-tools $(KUBEBUILDER_ASSETS) generate fmt vet manifests ## Run unit tests - ginkgo -r --randomize-all api/ internal/ + ginkgo -r --randomize-all api/ internal/ pkg/ .PHONY: integration-tests integration-tests: install-tools $(KUBEBUILDER_ASSETS) generate fmt vet manifests ## Run integration tests diff --git a/main.go b/main.go index d5954b283..509354070 100644 --- a/main.go +++ b/main.go @@ -11,6 +11,7 @@ package main import ( "flag" "fmt" + "github.com/rabbitmq/cluster-operator/pkg/profiling" "os" "strconv" "time" @@ -113,6 +114,17 @@ func main() { os.Exit(1) } + if enableDebugPprof, ok := os.LookupEnv("ENABLE_DEBUG_PPROF"); ok { + pprofEnabled, err := strconv.ParseBool(enableDebugPprof) + if err == nil && pprofEnabled { + mgr, err = profiling.AddDebugPprofEndpoints(mgr) + if err != nil { + log.Error(err, "unable to add debug endpoints to manager") + os.Exit(1) + } + } + } + clusterConfig := config.GetConfigOrDie() err = (&controllers.RabbitmqClusterReconciler{ diff --git a/pkg/profiling/pprof.go b/pkg/profiling/pprof.go new file mode 100644 index 000000000..dc8e39d44 --- /dev/null +++ b/pkg/profiling/pprof.go @@ -0,0 +1,28 @@ +package profiling + +import ( + "net/http" + "net/http/pprof" + ctrl "sigs.k8s.io/controller-runtime" +) + +func AddDebugPprofEndpoints(mgr ctrl.Manager) (ctrl.Manager, error) { + pprofEndpoints := map[string]http.HandlerFunc{ + "/debug/pprof": http.HandlerFunc(pprof.Index), + "/debug/pprof/heap": http.HandlerFunc(pprof.Index), + "/debug/pprof/mutex": http.HandlerFunc(pprof.Index), + "/debug/pprof/block": http.HandlerFunc(pprof.Index), + "/debug/pprof/goroutine": http.HandlerFunc(pprof.Index), + "/debug/pprof/cmdline": http.HandlerFunc(pprof.Cmdline), + "/debug/pprof/profile": http.HandlerFunc(pprof.Profile), + "/debug/pprof/symbol": http.HandlerFunc(pprof.Symbol), + "/debug/pprof/trace": http.HandlerFunc(pprof.Trace), + } + for path, handler := range pprofEndpoints { + err := mgr.AddMetricsExtraHandler(path, handler) + if err != nil { + return mgr, err + } + } + return mgr, nil +} diff --git a/pkg/profiling/pprof_test.go b/pkg/profiling/pprof_test.go new file mode 100644 index 000000000..2d6119be2 --- /dev/null +++ b/pkg/profiling/pprof_test.go @@ -0,0 +1,53 @@ +package profiling_test + +import ( + "context" + "fmt" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/rabbitmq/cluster-operator/pkg/profiling" + "io" + "net/http" + ctrl "sigs.k8s.io/controller-runtime" +) + +var _ = Describe("Pprof", func() { + + var ( + opts ctrl.Options + mgr ctrl.Manager + err error + metricsEndpoint string + ) + + BeforeEach(func() { + metricsEndpoint, err = getFreePort() + opts = ctrl.Options{ + MetricsBindAddress: metricsEndpoint, + } + mgr, err = ctrl.NewManager(cfg, opts) + Expect(err).NotTo(HaveOccurred()) + mgr, err = profiling.AddDebugPprofEndpoints(mgr) + Expect(err).NotTo(HaveOccurred()) + + }) + + It("should serve extra endpoints", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + defer GinkgoRecover() + Expect(mgr.Start(ctx)).NotTo(HaveOccurred()) + }() + <-mgr.Elected() + endpoint := fmt.Sprintf("http://%s/debug/pprof", metricsEndpoint) + resp, err := http.Get(endpoint) + Expect(err).NotTo(HaveOccurred()) + defer resp.Body.Close() + Expect(resp.StatusCode).To(Equal(http.StatusOK)) + + body, err := io.ReadAll(resp.Body) + Expect(err).NotTo(HaveOccurred()) + Expect(string(body)).NotTo(BeEmpty()) + }) +}) diff --git a/pkg/profiling/profiling_suite_test.go b/pkg/profiling/profiling_suite_test.go new file mode 100644 index 000000000..d0fae7faf --- /dev/null +++ b/pkg/profiling/profiling_suite_test.go @@ -0,0 +1,48 @@ +package profiling_test + +import ( + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "net" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var testenv *envtest.Environment +var cfg *rest.Config +var clientset *kubernetes.Clientset + +func TestProfiling(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Profiling Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + testenv = &envtest.Environment{} + + var err error + cfg, err = testenv.Start() + Expect(err).NotTo(HaveOccurred()) + +}) + +func getFreePort() (string, error) { + addr, err := net.ResolveTCPAddr("tcp", ":0") + if err != nil { + return "", err + } + + l, err := net.ListenTCP("tcp", addr) + if err != nil { + return "", err + } + defer l.Close() + return l.Addr().String(), nil +} From 604eff27c046e8c988f17f04203f119e298bd955 Mon Sep 17 00:00:00 2001 From: Connor Rogers <23215294+coro@users.noreply.github.com> Date: Mon, 13 Jun 2022 10:50:13 +0100 Subject: [PATCH 2/4] Remove warning on MacOS when running unit tests --- pkg/profiling/profiling_suite_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/profiling/profiling_suite_test.go b/pkg/profiling/profiling_suite_test.go index d0fae7faf..f61e13b1f 100644 --- a/pkg/profiling/profiling_suite_test.go +++ b/pkg/profiling/profiling_suite_test.go @@ -34,7 +34,7 @@ var _ = BeforeSuite(func() { }) func getFreePort() (string, error) { - addr, err := net.ResolveTCPAddr("tcp", ":0") + addr, err := net.ResolveTCPAddr("tcp", "127.0.0.1:0") if err != nil { return "", err } From de92cd8aaa353aaebfc1c1d2abd5b44c25669401 Mon Sep 17 00:00:00 2001 From: Connor Rogers <23215294+coro@users.noreply.github.com> Date: Mon, 13 Jun 2022 11:54:04 +0100 Subject: [PATCH 3/4] Add aftersuite to close envtest --- pkg/profiling/profiling_suite_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/profiling/profiling_suite_test.go b/pkg/profiling/profiling_suite_test.go index f61e13b1f..ab5afc94f 100644 --- a/pkg/profiling/profiling_suite_test.go +++ b/pkg/profiling/profiling_suite_test.go @@ -33,6 +33,10 @@ var _ = BeforeSuite(func() { }) +var _ = AfterSuite(func() { + Expect(testenv.Stop()).To(Succeed()) +}) + func getFreePort() (string, error) { addr, err := net.ResolveTCPAddr("tcp", "127.0.0.1:0") if err != nil { From 555505efb86a9e50169c78468382bd27dcc1ea0f Mon Sep 17 00:00:00 2001 From: Connor Rogers <23215294+coro@users.noreply.github.com> Date: Mon, 13 Jun 2022 17:29:31 +0100 Subject: [PATCH 4/4] Add missing pprof endpoints --- pkg/profiling/pprof.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pkg/profiling/pprof.go b/pkg/profiling/pprof.go index dc8e39d44..70e136a52 100644 --- a/pkg/profiling/pprof.go +++ b/pkg/profiling/pprof.go @@ -8,15 +8,17 @@ import ( func AddDebugPprofEndpoints(mgr ctrl.Manager) (ctrl.Manager, error) { pprofEndpoints := map[string]http.HandlerFunc{ - "/debug/pprof": http.HandlerFunc(pprof.Index), - "/debug/pprof/heap": http.HandlerFunc(pprof.Index), - "/debug/pprof/mutex": http.HandlerFunc(pprof.Index), - "/debug/pprof/block": http.HandlerFunc(pprof.Index), - "/debug/pprof/goroutine": http.HandlerFunc(pprof.Index), - "/debug/pprof/cmdline": http.HandlerFunc(pprof.Cmdline), - "/debug/pprof/profile": http.HandlerFunc(pprof.Profile), - "/debug/pprof/symbol": http.HandlerFunc(pprof.Symbol), - "/debug/pprof/trace": http.HandlerFunc(pprof.Trace), + "/debug/pprof": http.HandlerFunc(pprof.Index), + "/debug/pprof/allocs": http.HandlerFunc(pprof.Index), + "/debug/pprof/block": http.HandlerFunc(pprof.Index), + "/debug/pprof/cmdline": http.HandlerFunc(pprof.Cmdline), + "/debug/pprof/goroutine": http.HandlerFunc(pprof.Index), + "/debug/pprof/heap": http.HandlerFunc(pprof.Index), + "/debug/pprof/mutex": http.HandlerFunc(pprof.Index), + "/debug/pprof/profile": http.HandlerFunc(pprof.Profile), + "/debug/pprof/symbol": http.HandlerFunc(pprof.Symbol), + "/debug/pprof/threadcreate": http.HandlerFunc(pprof.Index), + "/debug/pprof/trace": http.HandlerFunc(pprof.Trace), } for path, handler := range pprofEndpoints { err := mgr.AddMetricsExtraHandler(path, handler)