diff --git a/constants.go b/constants.go index 494abf90ab4e6..0c3744a095698 100644 --- a/constants.go +++ b/constants.go @@ -144,6 +144,10 @@ const ( // ComponentDiagnostic is a diagnostic service ComponentDiagnostic = "diag" + // ComponentDiagnosticHealth is the health monitor used by the diagnostic + // and debug services. + ComponentDiagnosticHealth = "diag:health" + // ComponentDebug is the debug service, which exposes debugging // configuration over a Unix socket. ComponentDebug = "debug" diff --git a/lib/service/diagnostic.go b/lib/service/diagnostic.go new file mode 100644 index 0000000000000..3d912d9915e29 --- /dev/null +++ b/lib/service/diagnostic.go @@ -0,0 +1,93 @@ +// Teleport +// Copyright (C) 2025 Gravitational, Inc. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package service + +import ( + "log/slog" + "net/http" + + "github.com/gravitational/roundtrip" + "github.com/gravitational/trace" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + + "github.com/gravitational/teleport" + "github.com/gravitational/teleport/lib/srv/debug" +) + +type diagnosticHandlerConfig struct { + enableMetrics bool + enableProfiling bool + enableHealth bool + enableLogLeveler bool +} + +func (process *TeleportProcess) newDiagnosticHandler(config diagnosticHandlerConfig, logger *slog.Logger) (http.Handler, error) { + if process.state == nil { + return nil, trace.BadParameter("teleport process state machine has not yet been initialized (this is a bug)") + } + + mux := http.NewServeMux() + + if config.enableMetrics { + mux.Handle("/metrics", process.newMetricsHandler()) + } + + if config.enableProfiling { + debug.RegisterProfilingHandlers(mux, logger) + } + + if config.enableHealth { + mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { + roundtrip.ReplyJSON(w, http.StatusOK, map[string]any{"status": "ok"}) + }) + mux.HandleFunc("/readyz", process.state.readinessHandler()) + } + + if config.enableLogLeveler { + debug.RegisterLogLevelHandlers(mux, logger, process.Config) + } + + return mux, nil +} + +// newMetricsHandler creates a new metrics handler serving metrics both from the global prometheus registry and the +// in-process one. +func (process *TeleportProcess) newMetricsHandler() http.Handler { + // We gather metrics both from the in-process registry (preferred metrics registration method) + // and the global registry (used by some Teleport services and many dependencies). + gatherers := prometheus.Gatherers{ + process.metricsRegistry, + prometheus.DefaultGatherer, + } + + metricsHandler := promhttp.InstrumentMetricHandler( + process.metricsRegistry, promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{ + // Errors can happen if metrics are registered with identical names in both the local and the global registry. + // In this case, we log the error but continue collecting metrics. The first collected metric will win + // (the one from the local metrics registry takes precedence). + // As we move more things to the local registry, especially in other tools like tbot, we will have less + // conflicts in tests. + ErrorHandling: promhttp.ContinueOnError, + ErrorLog: promHTTPLogAdapter{ + ctx: process.ExitContext(), + Logger: process.logger.With(teleport.ComponentKey, teleport.ComponentMetrics), + }, + }), + ) + return metricsHandler +} diff --git a/lib/service/service.go b/lib/service/service.go index aa9ba1071d72a..012070246e18a 100644 --- a/lib/service/service.go +++ b/lib/service/service.go @@ -35,7 +35,6 @@ import ( "net/http" "net/http/httptest" "net/http/httputil" - "net/http/pprof" "os" "os/signal" "path/filepath" @@ -50,11 +49,9 @@ import ( awssession "github.com/aws/aws-sdk-go/aws/session" "github.com/google/renameio/v2" "github.com/google/uuid" - "github.com/gravitational/roundtrip" "github.com/gravitational/trace" "github.com/jonboulle/clockwork" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/sirupsen/logrus" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.opentelemetry.io/otel/attribute" @@ -149,7 +146,6 @@ import ( alpncommon "github.com/gravitational/teleport/lib/srv/alpnproxy/common" "github.com/gravitational/teleport/lib/srv/app" "github.com/gravitational/teleport/lib/srv/db" - "github.com/gravitational/teleport/lib/srv/debug" "github.com/gravitational/teleport/lib/srv/desktop" "github.com/gravitational/teleport/lib/srv/ingress" "github.com/gravitational/teleport/lib/srv/regular" @@ -467,6 +463,9 @@ type TeleportProcess struct { // Both the metricsRegistry and the default global registry are gathered by // Telepeort's metric service. metricsRegistry *prometheus.Registry + + // state is the process state machine tracking if the process is healthy or not. + state *processState } type keyPairKey struct { @@ -1226,6 +1225,12 @@ func NewTeleport(cfg *servicecfg.Config) (*TeleportProcess, error) { serviceStarted := false + ps, err := process.newProcessStateMachine() + if err != nil { + return nil, trace.Wrap(err, "failed to initialize process state machine") + } + process.state = ps + if !cfg.DiagnosticAddr.IsEmpty() { if err := process.initDiagnosticService(); err != nil { return nil, trace.Wrap(err) @@ -1244,12 +1249,8 @@ func NewTeleport(cfg *servicecfg.Config) (*TeleportProcess, error) { process.initPyroscope(address) } - if cfg.DebugService.Enabled { - if err := process.initDebugService(); err != nil { - return nil, trace.Wrap(err) - } - } else { - warnOnErr(process.ExitContext(), process.closeImportedDescriptors(teleport.ComponentDebug), process.logger) + if err := process.initDebugService(cfg.DebugService.Enabled); err != nil { + return nil, trace.Wrap(err) } // Create a process wide key generator that will be shared. This is so the @@ -3303,11 +3304,16 @@ func (l promHTTPLogAdapter) Println(v ...interface{}) { // initMetricsService starts the metrics service currently serving metrics for // prometheus consumption func (process *TeleportProcess) initMetricsService() error { - mux := http.NewServeMux() - mux.Handle("/metrics", process.newMetricsHandler()) - logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentMetrics, process.id)) + config := diagnosticHandlerConfig{ + enableMetrics: true, + } + mux, err := process.newDiagnosticHandler(config, logger) + if err != nil { + return trace.Wrap(err) + } + listener, err := process.importOrCreateListener(ListenerMetrics, process.Config.Metrics.ListenAddr.Addr) if err != nil { return trace.Wrap(err) @@ -3389,78 +3395,17 @@ func (process *TeleportProcess) initMetricsService() error { return nil } -// newMetricsHandler creates a new metrics handler serving metrics both from the global prometheus registry and the -// in-process one. -func (process *TeleportProcess) newMetricsHandler() http.Handler { - // We gather metrics both from the in-process registry (preferred metrics registration method) - // and the global registry (used by some Teleport services and many dependencies). - gatherers := prometheus.Gatherers{ - process.metricsRegistry, - prometheus.DefaultGatherer, - } - - metricsHandler := promhttp.InstrumentMetricHandler( - process.metricsRegistry, promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{ - // Errors can happen if metrics are registered with identical names in both the local and the global registry. - // In this case, we log the error but continue collecting metrics. The first collected metric will win - // (the one from the local metrics registry takes precedence). - // As we move more things to the local registry, especially in other tools like tbot, we will have less - // conflicts in tests. - ErrorHandling: promhttp.ContinueOnError, - ErrorLog: promHTTPLogAdapter{ - ctx: process.ExitContext(), - Logger: process.logger.With(teleport.ComponentKey, teleport.ComponentMetrics), - }, - }), - ) - return metricsHandler -} - -// initDiagnosticService starts diagnostic service currently serving healthz -// and prometheus endpoints -func (process *TeleportProcess) initDiagnosticService() error { - mux := http.NewServeMux() - - // support legacy metrics collection in the diagnostic service. - // metrics will otherwise be served by the metrics service if it's enabled - // in the config. - if !process.Config.Metrics.Enabled { - mux.Handle("/metrics", process.newMetricsHandler()) - } - - if process.Config.Debug { - process.logger.InfoContext(process.ExitContext(), "Adding diagnostic debugging handlers. To connect with profiler, use `go tool pprof `.", "listen_address", process.Config.DiagnosticAddr.Addr) - - noWriteTimeout := func(h http.HandlerFunc) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - rc := http.NewResponseController(w) //nolint:bodyclose // bodyclose gets really confused about NewResponseController - if err := rc.SetWriteDeadline(time.Time{}); err == nil { - // don't let the pprof handlers know about the WriteTimeout - r = r.WithContext(context.WithValue(r.Context(), http.ServerContextKey, nil)) - } - h(w, r) - } - } - - mux.HandleFunc("/debug/pprof/", noWriteTimeout(pprof.Index)) - mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) - mux.HandleFunc("/debug/pprof/profile", noWriteTimeout(pprof.Profile)) - mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) - mux.HandleFunc("/debug/pprof/trace", noWriteTimeout(pprof.Trace)) - } - - mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { - roundtrip.ReplyJSON(w, http.StatusOK, map[string]interface{}{"status": "ok"}) - }) - - logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentDiagnostic, process.id)) - +// newProcessStateMachine creates a state machine tracking the Teleport process +// state. The state machine is then used by the diagnostics or the debug service +// to evaluate the process health. +func (process *TeleportProcess) newProcessStateMachine() (*processState, error) { + logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentDiagnosticHealth, process.id)) // Create a state machine that will process and update the internal state of - // Teleport based off Events. Use this state machine to return return the + // Teleport based off Events. Use this state machine to return the // status from the /readyz endpoint. ps, err := newProcessState(process) if err != nil { - return trace.Wrap(err) + return nil, trace.Wrap(err) } process.RegisterFunc("readyz.monitor", func() error { @@ -3482,29 +3427,27 @@ func (process *TeleportProcess) initDiagnosticService() error { } } }) - mux.HandleFunc("/readyz", func(w http.ResponseWriter, r *http.Request) { - switch ps.getState() { - // 503 - case stateDegraded: - roundtrip.ReplyJSON(w, http.StatusServiceUnavailable, map[string]interface{}{ - "status": "teleport is in a degraded state, check logs for details", - }) - // 400 - case stateRecovering: - roundtrip.ReplyJSON(w, http.StatusBadRequest, map[string]interface{}{ - "status": "teleport is recovering from a degraded state, check logs for details", - }) - case stateStarting: - roundtrip.ReplyJSON(w, http.StatusBadRequest, map[string]interface{}{ - "status": "teleport is starting and hasn't joined the cluster yet", - }) - // 200 - case stateOK: - roundtrip.ReplyJSON(w, http.StatusOK, map[string]interface{}{ - "status": "ok", - }) - } - }) + return ps, nil +} + +// initDiagnosticService starts diagnostic service currently serving healthz +// and prometheus endpoints +func (process *TeleportProcess) initDiagnosticService() error { + logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentDiagnostic, process.id)) + + config := diagnosticHandlerConfig{ + // support legacy metrics collection in the diagnostic service. + // metrics will otherwise be served by the metrics service if it's enabled + // in the config. + enableMetrics: !process.Config.Metrics.Enabled, + enableProfiling: process.Config.Debug, + enableHealth: true, + enableLogLeveler: false, + } + mux, err := process.newDiagnosticHandler(config, logger) + if err != nil { + return trace.Wrap(err) + } listener, err := process.importOrCreateListener(ListenerDiagnostic, process.Config.DiagnosticAddr.Addr) if err != nil { @@ -3564,17 +3507,48 @@ func (process *TeleportProcess) initDiagnosticService() error { } // initDebugService starts debug service serving endpoints used for -// troubleshooting the instance. -func (process *TeleportProcess) initDebugService() error { +// troubleshooting the instance. This service is always active, users can +// disable its sensitive pprof and log-setting endpoints, but the liveness +// and readiness ones are always active. +func (process *TeleportProcess) initDebugService(exposeDebugRoutes bool) error { + if process.state == nil { + return trace.BadParameter("teleport process state machine has not yet been initialized (this is a bug)") + } + logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentDebug, process.id)) + // Unix socket creation can fail on paths too long. Depending on the UNIX implementation, + // socket paths cannot exceed 104 or 108 chars. listener, err := process.importOrCreateListener(ListenerDebug, filepath.Join(process.Config.DataDir, teleport.DebugServiceSocketName)) + if err != nil { + if exposeDebugRoutes { + // If the debug service is enabled in the config, this is a hard failure + return trace.Wrap(err) + } else { + // If the debug service was disabled in the config, we issue a warning and will have to continue. + logger.WarnContext(process.ExitContext(), + "Failed to open the debug socket. teleport-update will not be able to accurately check Teleport health.", + "error", err, + ) + return nil + } + } + + // Users can disable the debug service for compliance reasons but not the health + // routes because the updater relies on them. + config := diagnosticHandlerConfig{ + enableMetrics: exposeDebugRoutes, + enableProfiling: exposeDebugRoutes, + enableHealth: true, + enableLogLeveler: exposeDebugRoutes, + } + mux, err := process.newDiagnosticHandler(config, logger) if err != nil { return trace.Wrap(err) } server := &http.Server{ - Handler: debug.NewServeMux(logger, process.Config), + Handler: mux, ReadTimeout: apidefaults.DefaultIOTimeout, ReadHeaderTimeout: defaults.ReadHeadersTimeout, WriteTimeout: apidefaults.DefaultIOTimeout, diff --git a/lib/service/service_test.go b/lib/service/service_test.go index 7295f3ade8229..8d1c63de2d90b 100644 --- a/lib/service/service_test.go +++ b/lib/service/service_test.go @@ -1612,8 +1612,8 @@ func TestSingleProcessModeResolver(t *testing.T) { } // TestDebugServiceStartSocket ensures the debug service socket starts -// correctly, and is accessible. -func TestDebugServiceStartSocket(t *testing.T) { +// correctly, is accessible, and exposes the healthcheck endpoints. +func TestDebugService(t *testing.T) { t.Parallel() fakeClock := clockwork.NewFakeClock() @@ -1625,6 +1625,8 @@ func TestDebugServiceStartSocket(t *testing.T) { cfg.DiagnosticAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"} cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}) cfg.Auth.Enabled = true + cfg.Proxy.Enabled = false + cfg.Auth.StorageConfig.Params["path"] = dataDir cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"} cfg.SSH.Enabled = false cfg.CircuitBreakerConfig = breaker.NoopBreakerConfig() @@ -1635,6 +1637,11 @@ func TestDebugServiceStartSocket(t *testing.T) { require.NoError(t, process.Start()) t.Cleanup(func() { require.NoError(t, process.Close()) }) + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + t.Cleanup(cancel) + _, err = process.WaitForEvent(ctx, TeleportOKEvent) + require.NoError(t, err) + httpClient := &http.Client{ Timeout: 10 * time.Second, Transport: &http.Transport{ @@ -1644,11 +1651,52 @@ func TestDebugServiceStartSocket(t *testing.T) { }, } + // Testing the debug listener. // Fetch a random path, it should return 404 error. req, err := httpClient.Get("http://debug/random") require.NoError(t, err) defer req.Body.Close() require.Equal(t, http.StatusNotFound, req.StatusCode) + + // Test the healthcheck endpoints. + // Fetch the liveness path + req, err = httpClient.Get("http://debug/healthz") + require.NoError(t, err) + defer req.Body.Close() + require.Equal(t, http.StatusOK, req.StatusCode) + + // Fetch the readiness path + req, err = httpClient.Get("http://debug/readyz") + require.NoError(t, err) + defer req.Body.Close() + require.Equal(t, http.StatusOK, req.StatusCode) + + // Testing the metrics endpoint. + // Test setup: create our test metrics. + nonce := strings.ReplaceAll(uuid.NewString(), "-", "") + localMetric := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "test", + Name: "local_metric_" + nonce, + }) + globalMetric := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "test", + Name: "global_metric_" + nonce, + }) + require.NoError(t, process.metricsRegistry.Register(localMetric)) + require.NoError(t, prometheus.Register(globalMetric)) + + // Test execution: hit the metrics endpoint. + resp, err := httpClient.Get("http://debug/metrics") + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + require.NoError(t, resp.Body.Close()) + + // Test validation: check that the metrics server served both the local and global registry. + require.Contains(t, string(body), "local_metric_"+nonce) + require.Contains(t, string(body), "global_metric_"+nonce) } type mockInstanceMetadata struct { @@ -1858,7 +1906,7 @@ func TestInitDatabaseService(t *testing.T) { // TestMetricsService tests that the optional metrics service exposes // metrics from both the in-process and global metrics registry. When the // service is disabled, metrics are served by the diagnostics service -// (tested in TestMetricsInDiagnosticsService). +// (tested in TestDiagnosticsService). func TestMetricsService(t *testing.T) { t.Parallel() // Test setup: create a listener for the metrics server, get its file descriptor. @@ -1937,10 +1985,11 @@ func TestMetricsService(t *testing.T) { require.Contains(t, string(body), "global_metric_"+nonce) } -// TestMetricsInDiagnosticsService tests that the diagnostics service exposes +// TestDiagnosticsService tests that the diagnostics service exposes // metrics from both the in-process and global metrics registry when the metrics -// service is disabled. -func TestMetricsInDiagnosticsService(t *testing.T) { +// service is disabled. It also checks that the diagnostics service exposes the +// health routes. +func TestDiagnosticsService(t *testing.T) { t.Parallel() // Test setup: create a new teleport process dataDir := makeTempDir(t) @@ -1979,7 +2028,7 @@ func TestMetricsInDiagnosticsService(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) t.Cleanup(cancel) - _, err = process.WaitForEvent(ctx, TeleportReadyEvent) + _, err = process.WaitForEvent(ctx, TeleportOKEvent) require.NoError(t, err) // Test execution: query the metrics endpoint and check the tests metrics are here. @@ -1999,6 +2048,24 @@ func TestMetricsInDiagnosticsService(t *testing.T) { // Test validation: check that the metrics server served both the local and global registry. require.Contains(t, string(body), "local_metric_"+nonce) require.Contains(t, string(body), "global_metric_"+nonce) + + // Fetch the liveness endpoint + healthURL, err := url.Parse("http://" + diagAddr.String()) + require.NoError(t, err) + healthURL.Path = "/healthz" + resp, err = http.Get(healthURL.String()) + require.NoError(t, err) + require.NoError(t, resp.Body.Close()) + require.Equal(t, http.StatusOK, resp.StatusCode) + + // Fetch the readiness endpoint + readinessURL, err := url.Parse("http://" + diagAddr.String()) + require.NoError(t, err) + readinessURL.Path = "/readyz" + resp, err = http.Get(readinessURL.String()) + require.NoError(t, err) + require.NoError(t, resp.Body.Close()) + require.Equal(t, http.StatusOK, resp.StatusCode) } // makeTempDir makes a temp dir with a shorter name than t.TempDir() in order to diff --git a/lib/service/state.go b/lib/service/state.go index ad76112391960..3c057148683c6 100644 --- a/lib/service/state.go +++ b/lib/service/state.go @@ -20,9 +20,11 @@ package service import ( "fmt" + "net/http" "sync" "time" + "github.com/gravitational/roundtrip" "github.com/gravitational/trace" "github.com/prometheus/client_golang/prometheus" @@ -171,3 +173,29 @@ func (f *processState) getState() componentStateEnum { defer f.mu.Unlock() return f.getStateLocked() } + +func (f *processState) readinessHandler() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + switch f.getState() { + // 503 + case stateDegraded: + roundtrip.ReplyJSON(w, http.StatusServiceUnavailable, map[string]any{ + "status": "teleport is in a degraded state, check logs for details", + }) + // 400 + case stateRecovering: + roundtrip.ReplyJSON(w, http.StatusBadRequest, map[string]any{ + "status": "teleport is recovering from a degraded state, check logs for details", + }) + case stateStarting: + roundtrip.ReplyJSON(w, http.StatusBadRequest, map[string]any{ + "status": "teleport is starting and hasn't joined the cluster yet", + }) + // 200 + case stateOK: + roundtrip.ReplyJSON(w, http.StatusOK, map[string]any{ + "status": "ok", + }) + } + } +} diff --git a/lib/srv/debug/handlers.go b/lib/srv/debug/handlers.go index 8de5ac6fdeb4e..c8b7807b0e692 100644 --- a/lib/srv/debug/handlers.go +++ b/lib/srv/debug/handlers.go @@ -17,6 +17,7 @@ package debug import ( + "context" "fmt" "io" "log/slog" @@ -24,6 +25,7 @@ import ( "net/http/pprof" "slices" "strings" + "time" "github.com/gravitational/trace" @@ -38,19 +40,34 @@ type LogLeveler interface { SetLogLevel(slog.Level) } -// NewServeMux returns a http mux that handles all the debug service endpoints. -func NewServeMux(logger *slog.Logger, leveler LogLeveler) *http.ServeMux { - mux := http.NewServeMux() +// RegisterProfilingHandlers registers the debug profiling handlers (/debug/pprof/*) +// to a given multiplexer. +func RegisterProfilingHandlers(mux *http.ServeMux, logger *slog.Logger) { + noWriteTimeout := func(h http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + rc := http.NewResponseController(w) //nolint:bodyclose // bodyclose gets really confused about NewResponseController + if err := rc.SetWriteDeadline(time.Time{}); err == nil { + // don't let the pprof handlers know about the WriteTimeout + r = r.WithContext(context.WithValue(r.Context(), http.ServerContextKey, nil)) + } + h(w, r) + } + } + mux.HandleFunc("/debug/pprof/cmdline", pprofMiddleware(logger, "cmdline", pprof.Cmdline)) - mux.HandleFunc("/debug/pprof/profile", pprofMiddleware(logger, "profile", pprof.Profile)) + mux.HandleFunc("/debug/pprof/profile", pprofMiddleware(logger, "profile", noWriteTimeout(pprof.Profile))) mux.HandleFunc("/debug/pprof/symbol", pprofMiddleware(logger, "symbol", pprof.Symbol)) - mux.HandleFunc("/debug/pprof/trace", pprofMiddleware(logger, "trace", pprof.Trace)) + mux.HandleFunc("/debug/pprof/trace", pprofMiddleware(logger, "trace", noWriteTimeout(pprof.Trace))) mux.HandleFunc("/debug/pprof/{profile}", func(w http.ResponseWriter, r *http.Request) { - pprofMiddleware(logger, r.PathValue("profile"), pprof.Index)(w, r) + pprofMiddleware(logger, r.PathValue("profile"), noWriteTimeout(pprof.Index))(w, r) }) +} + +// RegisterLogLevelHandlers registers log level handlers to a given multiplexer. +// This allows to dynamically change the process' log level. +func RegisterLogLevelHandlers(mux *http.ServeMux, logger *slog.Logger, leveler LogLeveler) { mux.Handle("GET /log-level", handleGetLog(logger, leveler)) mux.Handle("PUT /log-level", handleSetLog(logger, leveler)) - return mux } // handleGetLog returns the http get log level handler. diff --git a/lib/srv/debug/handlers_test.go b/lib/srv/debug/handlers_test.go index 6c20b67f03de7..b9224fc9f3c1f 100644 --- a/lib/srv/debug/handlers_test.go +++ b/lib/srv/debug/handlers_test.go @@ -95,8 +95,15 @@ func TestCollectProfiles(t *testing.T) { func makeServer(t *testing.T) (*mockLeveler, *httptest.Server) { leveler := &mockLeveler{} - ts := httptest.NewServer(NewServeMux(slog.New(logutils.NewSlogTextHandler(io.Discard, logutils.SlogTextHandlerConfig{})), leveler)) + logger := slog.New(logutils.NewSlogTextHandler(io.Discard, logutils.SlogTextHandlerConfig{})) + + mux := http.NewServeMux() + RegisterProfilingHandlers(mux, logger) + RegisterLogLevelHandlers(mux, logger, leveler) + + ts := httptest.NewServer(mux) t.Cleanup(func() { ts.Close() }) + return leveler, ts }