diff --git a/constants.go b/constants.go
index 494abf90ab4e6..0c3744a095698 100644
--- a/constants.go
+++ b/constants.go
@@ -144,6 +144,10 @@ const (
// ComponentDiagnostic is a diagnostic service
ComponentDiagnostic = "diag"
+ // ComponentDiagnosticHealth is the health monitor used by the diagnostic
+ // and debug services.
+ ComponentDiagnosticHealth = "diag:health"
+
// ComponentDebug is the debug service, which exposes debugging
// configuration over a Unix socket.
ComponentDebug = "debug"
diff --git a/lib/service/diagnostic.go b/lib/service/diagnostic.go
new file mode 100644
index 0000000000000..3d912d9915e29
--- /dev/null
+++ b/lib/service/diagnostic.go
@@ -0,0 +1,93 @@
+// Teleport
+// Copyright (C) 2025 Gravitational, Inc.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see .
+
+package service
+
+import (
+ "log/slog"
+ "net/http"
+
+ "github.com/gravitational/roundtrip"
+ "github.com/gravitational/trace"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promhttp"
+
+ "github.com/gravitational/teleport"
+ "github.com/gravitational/teleport/lib/srv/debug"
+)
+
+type diagnosticHandlerConfig struct {
+ enableMetrics bool
+ enableProfiling bool
+ enableHealth bool
+ enableLogLeveler bool
+}
+
+func (process *TeleportProcess) newDiagnosticHandler(config diagnosticHandlerConfig, logger *slog.Logger) (http.Handler, error) {
+ if process.state == nil {
+ return nil, trace.BadParameter("teleport process state machine has not yet been initialized (this is a bug)")
+ }
+
+ mux := http.NewServeMux()
+
+ if config.enableMetrics {
+ mux.Handle("/metrics", process.newMetricsHandler())
+ }
+
+ if config.enableProfiling {
+ debug.RegisterProfilingHandlers(mux, logger)
+ }
+
+ if config.enableHealth {
+ mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
+ roundtrip.ReplyJSON(w, http.StatusOK, map[string]any{"status": "ok"})
+ })
+ mux.HandleFunc("/readyz", process.state.readinessHandler())
+ }
+
+ if config.enableLogLeveler {
+ debug.RegisterLogLevelHandlers(mux, logger, process.Config)
+ }
+
+ return mux, nil
+}
+
+// newMetricsHandler creates a new metrics handler serving metrics both from the global prometheus registry and the
+// in-process one.
+func (process *TeleportProcess) newMetricsHandler() http.Handler {
+ // We gather metrics both from the in-process registry (preferred metrics registration method)
+ // and the global registry (used by some Teleport services and many dependencies).
+ gatherers := prometheus.Gatherers{
+ process.metricsRegistry,
+ prometheus.DefaultGatherer,
+ }
+
+ metricsHandler := promhttp.InstrumentMetricHandler(
+ process.metricsRegistry, promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{
+ // Errors can happen if metrics are registered with identical names in both the local and the global registry.
+ // In this case, we log the error but continue collecting metrics. The first collected metric will win
+ // (the one from the local metrics registry takes precedence).
+ // As we move more things to the local registry, especially in other tools like tbot, we will have less
+ // conflicts in tests.
+ ErrorHandling: promhttp.ContinueOnError,
+ ErrorLog: promHTTPLogAdapter{
+ ctx: process.ExitContext(),
+ Logger: process.logger.With(teleport.ComponentKey, teleport.ComponentMetrics),
+ },
+ }),
+ )
+ return metricsHandler
+}
diff --git a/lib/service/service.go b/lib/service/service.go
index aa9ba1071d72a..012070246e18a 100644
--- a/lib/service/service.go
+++ b/lib/service/service.go
@@ -35,7 +35,6 @@ import (
"net/http"
"net/http/httptest"
"net/http/httputil"
- "net/http/pprof"
"os"
"os/signal"
"path/filepath"
@@ -50,11 +49,9 @@ import (
awssession "github.com/aws/aws-sdk-go/aws/session"
"github.com/google/renameio/v2"
"github.com/google/uuid"
- "github.com/gravitational/roundtrip"
"github.com/gravitational/trace"
"github.com/jonboulle/clockwork"
"github.com/prometheus/client_golang/prometheus"
- "github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"go.opentelemetry.io/otel/attribute"
@@ -149,7 +146,6 @@ import (
alpncommon "github.com/gravitational/teleport/lib/srv/alpnproxy/common"
"github.com/gravitational/teleport/lib/srv/app"
"github.com/gravitational/teleport/lib/srv/db"
- "github.com/gravitational/teleport/lib/srv/debug"
"github.com/gravitational/teleport/lib/srv/desktop"
"github.com/gravitational/teleport/lib/srv/ingress"
"github.com/gravitational/teleport/lib/srv/regular"
@@ -467,6 +463,9 @@ type TeleportProcess struct {
// Both the metricsRegistry and the default global registry are gathered by
// Telepeort's metric service.
metricsRegistry *prometheus.Registry
+
+ // state is the process state machine tracking if the process is healthy or not.
+ state *processState
}
type keyPairKey struct {
@@ -1226,6 +1225,12 @@ func NewTeleport(cfg *servicecfg.Config) (*TeleportProcess, error) {
serviceStarted := false
+ ps, err := process.newProcessStateMachine()
+ if err != nil {
+ return nil, trace.Wrap(err, "failed to initialize process state machine")
+ }
+ process.state = ps
+
if !cfg.DiagnosticAddr.IsEmpty() {
if err := process.initDiagnosticService(); err != nil {
return nil, trace.Wrap(err)
@@ -1244,12 +1249,8 @@ func NewTeleport(cfg *servicecfg.Config) (*TeleportProcess, error) {
process.initPyroscope(address)
}
- if cfg.DebugService.Enabled {
- if err := process.initDebugService(); err != nil {
- return nil, trace.Wrap(err)
- }
- } else {
- warnOnErr(process.ExitContext(), process.closeImportedDescriptors(teleport.ComponentDebug), process.logger)
+ if err := process.initDebugService(cfg.DebugService.Enabled); err != nil {
+ return nil, trace.Wrap(err)
}
// Create a process wide key generator that will be shared. This is so the
@@ -3303,11 +3304,16 @@ func (l promHTTPLogAdapter) Println(v ...interface{}) {
// initMetricsService starts the metrics service currently serving metrics for
// prometheus consumption
func (process *TeleportProcess) initMetricsService() error {
- mux := http.NewServeMux()
- mux.Handle("/metrics", process.newMetricsHandler())
-
logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentMetrics, process.id))
+ config := diagnosticHandlerConfig{
+ enableMetrics: true,
+ }
+ mux, err := process.newDiagnosticHandler(config, logger)
+ if err != nil {
+ return trace.Wrap(err)
+ }
+
listener, err := process.importOrCreateListener(ListenerMetrics, process.Config.Metrics.ListenAddr.Addr)
if err != nil {
return trace.Wrap(err)
@@ -3389,78 +3395,17 @@ func (process *TeleportProcess) initMetricsService() error {
return nil
}
-// newMetricsHandler creates a new metrics handler serving metrics both from the global prometheus registry and the
-// in-process one.
-func (process *TeleportProcess) newMetricsHandler() http.Handler {
- // We gather metrics both from the in-process registry (preferred metrics registration method)
- // and the global registry (used by some Teleport services and many dependencies).
- gatherers := prometheus.Gatherers{
- process.metricsRegistry,
- prometheus.DefaultGatherer,
- }
-
- metricsHandler := promhttp.InstrumentMetricHandler(
- process.metricsRegistry, promhttp.HandlerFor(gatherers, promhttp.HandlerOpts{
- // Errors can happen if metrics are registered with identical names in both the local and the global registry.
- // In this case, we log the error but continue collecting metrics. The first collected metric will win
- // (the one from the local metrics registry takes precedence).
- // As we move more things to the local registry, especially in other tools like tbot, we will have less
- // conflicts in tests.
- ErrorHandling: promhttp.ContinueOnError,
- ErrorLog: promHTTPLogAdapter{
- ctx: process.ExitContext(),
- Logger: process.logger.With(teleport.ComponentKey, teleport.ComponentMetrics),
- },
- }),
- )
- return metricsHandler
-}
-
-// initDiagnosticService starts diagnostic service currently serving healthz
-// and prometheus endpoints
-func (process *TeleportProcess) initDiagnosticService() error {
- mux := http.NewServeMux()
-
- // support legacy metrics collection in the diagnostic service.
- // metrics will otherwise be served by the metrics service if it's enabled
- // in the config.
- if !process.Config.Metrics.Enabled {
- mux.Handle("/metrics", process.newMetricsHandler())
- }
-
- if process.Config.Debug {
- process.logger.InfoContext(process.ExitContext(), "Adding diagnostic debugging handlers. To connect with profiler, use `go tool pprof `.", "listen_address", process.Config.DiagnosticAddr.Addr)
-
- noWriteTimeout := func(h http.HandlerFunc) http.HandlerFunc {
- return func(w http.ResponseWriter, r *http.Request) {
- rc := http.NewResponseController(w) //nolint:bodyclose // bodyclose gets really confused about NewResponseController
- if err := rc.SetWriteDeadline(time.Time{}); err == nil {
- // don't let the pprof handlers know about the WriteTimeout
- r = r.WithContext(context.WithValue(r.Context(), http.ServerContextKey, nil))
- }
- h(w, r)
- }
- }
-
- mux.HandleFunc("/debug/pprof/", noWriteTimeout(pprof.Index))
- mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
- mux.HandleFunc("/debug/pprof/profile", noWriteTimeout(pprof.Profile))
- mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
- mux.HandleFunc("/debug/pprof/trace", noWriteTimeout(pprof.Trace))
- }
-
- mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
- roundtrip.ReplyJSON(w, http.StatusOK, map[string]interface{}{"status": "ok"})
- })
-
- logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentDiagnostic, process.id))
-
+// newProcessStateMachine creates a state machine tracking the Teleport process
+// state. The state machine is then used by the diagnostics or the debug service
+// to evaluate the process health.
+func (process *TeleportProcess) newProcessStateMachine() (*processState, error) {
+ logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentDiagnosticHealth, process.id))
// Create a state machine that will process and update the internal state of
- // Teleport based off Events. Use this state machine to return return the
+ // Teleport based off Events. Use this state machine to return the
// status from the /readyz endpoint.
ps, err := newProcessState(process)
if err != nil {
- return trace.Wrap(err)
+ return nil, trace.Wrap(err)
}
process.RegisterFunc("readyz.monitor", func() error {
@@ -3482,29 +3427,27 @@ func (process *TeleportProcess) initDiagnosticService() error {
}
}
})
- mux.HandleFunc("/readyz", func(w http.ResponseWriter, r *http.Request) {
- switch ps.getState() {
- // 503
- case stateDegraded:
- roundtrip.ReplyJSON(w, http.StatusServiceUnavailable, map[string]interface{}{
- "status": "teleport is in a degraded state, check logs for details",
- })
- // 400
- case stateRecovering:
- roundtrip.ReplyJSON(w, http.StatusBadRequest, map[string]interface{}{
- "status": "teleport is recovering from a degraded state, check logs for details",
- })
- case stateStarting:
- roundtrip.ReplyJSON(w, http.StatusBadRequest, map[string]interface{}{
- "status": "teleport is starting and hasn't joined the cluster yet",
- })
- // 200
- case stateOK:
- roundtrip.ReplyJSON(w, http.StatusOK, map[string]interface{}{
- "status": "ok",
- })
- }
- })
+ return ps, nil
+}
+
+// initDiagnosticService starts diagnostic service currently serving healthz
+// and prometheus endpoints
+func (process *TeleportProcess) initDiagnosticService() error {
+ logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentDiagnostic, process.id))
+
+ config := diagnosticHandlerConfig{
+ // support legacy metrics collection in the diagnostic service.
+ // metrics will otherwise be served by the metrics service if it's enabled
+ // in the config.
+ enableMetrics: !process.Config.Metrics.Enabled,
+ enableProfiling: process.Config.Debug,
+ enableHealth: true,
+ enableLogLeveler: false,
+ }
+ mux, err := process.newDiagnosticHandler(config, logger)
+ if err != nil {
+ return trace.Wrap(err)
+ }
listener, err := process.importOrCreateListener(ListenerDiagnostic, process.Config.DiagnosticAddr.Addr)
if err != nil {
@@ -3564,17 +3507,48 @@ func (process *TeleportProcess) initDiagnosticService() error {
}
// initDebugService starts debug service serving endpoints used for
-// troubleshooting the instance.
-func (process *TeleportProcess) initDebugService() error {
+// troubleshooting the instance. This service is always active, users can
+// disable its sensitive pprof and log-setting endpoints, but the liveness
+// and readiness ones are always active.
+func (process *TeleportProcess) initDebugService(exposeDebugRoutes bool) error {
+ if process.state == nil {
+ return trace.BadParameter("teleport process state machine has not yet been initialized (this is a bug)")
+ }
+
logger := process.logger.With(teleport.ComponentKey, teleport.Component(teleport.ComponentDebug, process.id))
+ // Unix socket creation can fail on paths too long. Depending on the UNIX implementation,
+ // socket paths cannot exceed 104 or 108 chars.
listener, err := process.importOrCreateListener(ListenerDebug, filepath.Join(process.Config.DataDir, teleport.DebugServiceSocketName))
+ if err != nil {
+ if exposeDebugRoutes {
+ // If the debug service is enabled in the config, this is a hard failure
+ return trace.Wrap(err)
+ } else {
+ // If the debug service was disabled in the config, we issue a warning and will have to continue.
+ logger.WarnContext(process.ExitContext(),
+ "Failed to open the debug socket. teleport-update will not be able to accurately check Teleport health.",
+ "error", err,
+ )
+ return nil
+ }
+ }
+
+ // Users can disable the debug service for compliance reasons but not the health
+ // routes because the updater relies on them.
+ config := diagnosticHandlerConfig{
+ enableMetrics: exposeDebugRoutes,
+ enableProfiling: exposeDebugRoutes,
+ enableHealth: true,
+ enableLogLeveler: exposeDebugRoutes,
+ }
+ mux, err := process.newDiagnosticHandler(config, logger)
if err != nil {
return trace.Wrap(err)
}
server := &http.Server{
- Handler: debug.NewServeMux(logger, process.Config),
+ Handler: mux,
ReadTimeout: apidefaults.DefaultIOTimeout,
ReadHeaderTimeout: defaults.ReadHeadersTimeout,
WriteTimeout: apidefaults.DefaultIOTimeout,
diff --git a/lib/service/service_test.go b/lib/service/service_test.go
index 7295f3ade8229..8d1c63de2d90b 100644
--- a/lib/service/service_test.go
+++ b/lib/service/service_test.go
@@ -1612,8 +1612,8 @@ func TestSingleProcessModeResolver(t *testing.T) {
}
// TestDebugServiceStartSocket ensures the debug service socket starts
-// correctly, and is accessible.
-func TestDebugServiceStartSocket(t *testing.T) {
+// correctly, is accessible, and exposes the healthcheck endpoints.
+func TestDebugService(t *testing.T) {
t.Parallel()
fakeClock := clockwork.NewFakeClock()
@@ -1625,6 +1625,8 @@ func TestDebugServiceStartSocket(t *testing.T) {
cfg.DiagnosticAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}
cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"})
cfg.Auth.Enabled = true
+ cfg.Proxy.Enabled = false
+ cfg.Auth.StorageConfig.Params["path"] = dataDir
cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}
cfg.SSH.Enabled = false
cfg.CircuitBreakerConfig = breaker.NoopBreakerConfig()
@@ -1635,6 +1637,11 @@ func TestDebugServiceStartSocket(t *testing.T) {
require.NoError(t, process.Start())
t.Cleanup(func() { require.NoError(t, process.Close()) })
+ ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+ t.Cleanup(cancel)
+ _, err = process.WaitForEvent(ctx, TeleportOKEvent)
+ require.NoError(t, err)
+
httpClient := &http.Client{
Timeout: 10 * time.Second,
Transport: &http.Transport{
@@ -1644,11 +1651,52 @@ func TestDebugServiceStartSocket(t *testing.T) {
},
}
+ // Testing the debug listener.
// Fetch a random path, it should return 404 error.
req, err := httpClient.Get("http://debug/random")
require.NoError(t, err)
defer req.Body.Close()
require.Equal(t, http.StatusNotFound, req.StatusCode)
+
+ // Test the healthcheck endpoints.
+ // Fetch the liveness path
+ req, err = httpClient.Get("http://debug/healthz")
+ require.NoError(t, err)
+ defer req.Body.Close()
+ require.Equal(t, http.StatusOK, req.StatusCode)
+
+ // Fetch the readiness path
+ req, err = httpClient.Get("http://debug/readyz")
+ require.NoError(t, err)
+ defer req.Body.Close()
+ require.Equal(t, http.StatusOK, req.StatusCode)
+
+ // Testing the metrics endpoint.
+ // Test setup: create our test metrics.
+ nonce := strings.ReplaceAll(uuid.NewString(), "-", "")
+ localMetric := prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "test",
+ Name: "local_metric_" + nonce,
+ })
+ globalMetric := prometheus.NewGauge(prometheus.GaugeOpts{
+ Namespace: "test",
+ Name: "global_metric_" + nonce,
+ })
+ require.NoError(t, process.metricsRegistry.Register(localMetric))
+ require.NoError(t, prometheus.Register(globalMetric))
+
+ // Test execution: hit the metrics endpoint.
+ resp, err := httpClient.Get("http://debug/metrics")
+ require.NoError(t, err)
+ require.Equal(t, http.StatusOK, resp.StatusCode)
+
+ body, err := io.ReadAll(resp.Body)
+ require.NoError(t, err)
+ require.NoError(t, resp.Body.Close())
+
+ // Test validation: check that the metrics server served both the local and global registry.
+ require.Contains(t, string(body), "local_metric_"+nonce)
+ require.Contains(t, string(body), "global_metric_"+nonce)
}
type mockInstanceMetadata struct {
@@ -1858,7 +1906,7 @@ func TestInitDatabaseService(t *testing.T) {
// TestMetricsService tests that the optional metrics service exposes
// metrics from both the in-process and global metrics registry. When the
// service is disabled, metrics are served by the diagnostics service
-// (tested in TestMetricsInDiagnosticsService).
+// (tested in TestDiagnosticsService).
func TestMetricsService(t *testing.T) {
t.Parallel()
// Test setup: create a listener for the metrics server, get its file descriptor.
@@ -1937,10 +1985,11 @@ func TestMetricsService(t *testing.T) {
require.Contains(t, string(body), "global_metric_"+nonce)
}
-// TestMetricsInDiagnosticsService tests that the diagnostics service exposes
+// TestDiagnosticsService tests that the diagnostics service exposes
// metrics from both the in-process and global metrics registry when the metrics
-// service is disabled.
-func TestMetricsInDiagnosticsService(t *testing.T) {
+// service is disabled. It also checks that the diagnostics service exposes the
+// health routes.
+func TestDiagnosticsService(t *testing.T) {
t.Parallel()
// Test setup: create a new teleport process
dataDir := makeTempDir(t)
@@ -1979,7 +2028,7 @@ func TestMetricsInDiagnosticsService(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
t.Cleanup(cancel)
- _, err = process.WaitForEvent(ctx, TeleportReadyEvent)
+ _, err = process.WaitForEvent(ctx, TeleportOKEvent)
require.NoError(t, err)
// Test execution: query the metrics endpoint and check the tests metrics are here.
@@ -1999,6 +2048,24 @@ func TestMetricsInDiagnosticsService(t *testing.T) {
// Test validation: check that the metrics server served both the local and global registry.
require.Contains(t, string(body), "local_metric_"+nonce)
require.Contains(t, string(body), "global_metric_"+nonce)
+
+ // Fetch the liveness endpoint
+ healthURL, err := url.Parse("http://" + diagAddr.String())
+ require.NoError(t, err)
+ healthURL.Path = "/healthz"
+ resp, err = http.Get(healthURL.String())
+ require.NoError(t, err)
+ require.NoError(t, resp.Body.Close())
+ require.Equal(t, http.StatusOK, resp.StatusCode)
+
+ // Fetch the readiness endpoint
+ readinessURL, err := url.Parse("http://" + diagAddr.String())
+ require.NoError(t, err)
+ readinessURL.Path = "/readyz"
+ resp, err = http.Get(readinessURL.String())
+ require.NoError(t, err)
+ require.NoError(t, resp.Body.Close())
+ require.Equal(t, http.StatusOK, resp.StatusCode)
}
// makeTempDir makes a temp dir with a shorter name than t.TempDir() in order to
diff --git a/lib/service/state.go b/lib/service/state.go
index ad76112391960..3c057148683c6 100644
--- a/lib/service/state.go
+++ b/lib/service/state.go
@@ -20,9 +20,11 @@ package service
import (
"fmt"
+ "net/http"
"sync"
"time"
+ "github.com/gravitational/roundtrip"
"github.com/gravitational/trace"
"github.com/prometheus/client_golang/prometheus"
@@ -171,3 +173,29 @@ func (f *processState) getState() componentStateEnum {
defer f.mu.Unlock()
return f.getStateLocked()
}
+
+func (f *processState) readinessHandler() http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ switch f.getState() {
+ // 503
+ case stateDegraded:
+ roundtrip.ReplyJSON(w, http.StatusServiceUnavailable, map[string]any{
+ "status": "teleport is in a degraded state, check logs for details",
+ })
+ // 400
+ case stateRecovering:
+ roundtrip.ReplyJSON(w, http.StatusBadRequest, map[string]any{
+ "status": "teleport is recovering from a degraded state, check logs for details",
+ })
+ case stateStarting:
+ roundtrip.ReplyJSON(w, http.StatusBadRequest, map[string]any{
+ "status": "teleport is starting and hasn't joined the cluster yet",
+ })
+ // 200
+ case stateOK:
+ roundtrip.ReplyJSON(w, http.StatusOK, map[string]any{
+ "status": "ok",
+ })
+ }
+ }
+}
diff --git a/lib/srv/debug/handlers.go b/lib/srv/debug/handlers.go
index 8de5ac6fdeb4e..c8b7807b0e692 100644
--- a/lib/srv/debug/handlers.go
+++ b/lib/srv/debug/handlers.go
@@ -17,6 +17,7 @@
package debug
import (
+ "context"
"fmt"
"io"
"log/slog"
@@ -24,6 +25,7 @@ import (
"net/http/pprof"
"slices"
"strings"
+ "time"
"github.com/gravitational/trace"
@@ -38,19 +40,34 @@ type LogLeveler interface {
SetLogLevel(slog.Level)
}
-// NewServeMux returns a http mux that handles all the debug service endpoints.
-func NewServeMux(logger *slog.Logger, leveler LogLeveler) *http.ServeMux {
- mux := http.NewServeMux()
+// RegisterProfilingHandlers registers the debug profiling handlers (/debug/pprof/*)
+// to a given multiplexer.
+func RegisterProfilingHandlers(mux *http.ServeMux, logger *slog.Logger) {
+ noWriteTimeout := func(h http.HandlerFunc) http.HandlerFunc {
+ return func(w http.ResponseWriter, r *http.Request) {
+ rc := http.NewResponseController(w) //nolint:bodyclose // bodyclose gets really confused about NewResponseController
+ if err := rc.SetWriteDeadline(time.Time{}); err == nil {
+ // don't let the pprof handlers know about the WriteTimeout
+ r = r.WithContext(context.WithValue(r.Context(), http.ServerContextKey, nil))
+ }
+ h(w, r)
+ }
+ }
+
mux.HandleFunc("/debug/pprof/cmdline", pprofMiddleware(logger, "cmdline", pprof.Cmdline))
- mux.HandleFunc("/debug/pprof/profile", pprofMiddleware(logger, "profile", pprof.Profile))
+ mux.HandleFunc("/debug/pprof/profile", pprofMiddleware(logger, "profile", noWriteTimeout(pprof.Profile)))
mux.HandleFunc("/debug/pprof/symbol", pprofMiddleware(logger, "symbol", pprof.Symbol))
- mux.HandleFunc("/debug/pprof/trace", pprofMiddleware(logger, "trace", pprof.Trace))
+ mux.HandleFunc("/debug/pprof/trace", pprofMiddleware(logger, "trace", noWriteTimeout(pprof.Trace)))
mux.HandleFunc("/debug/pprof/{profile}", func(w http.ResponseWriter, r *http.Request) {
- pprofMiddleware(logger, r.PathValue("profile"), pprof.Index)(w, r)
+ pprofMiddleware(logger, r.PathValue("profile"), noWriteTimeout(pprof.Index))(w, r)
})
+}
+
+// RegisterLogLevelHandlers registers log level handlers to a given multiplexer.
+// This allows to dynamically change the process' log level.
+func RegisterLogLevelHandlers(mux *http.ServeMux, logger *slog.Logger, leveler LogLeveler) {
mux.Handle("GET /log-level", handleGetLog(logger, leveler))
mux.Handle("PUT /log-level", handleSetLog(logger, leveler))
- return mux
}
// handleGetLog returns the http get log level handler.
diff --git a/lib/srv/debug/handlers_test.go b/lib/srv/debug/handlers_test.go
index 6c20b67f03de7..b9224fc9f3c1f 100644
--- a/lib/srv/debug/handlers_test.go
+++ b/lib/srv/debug/handlers_test.go
@@ -95,8 +95,15 @@ func TestCollectProfiles(t *testing.T) {
func makeServer(t *testing.T) (*mockLeveler, *httptest.Server) {
leveler := &mockLeveler{}
- ts := httptest.NewServer(NewServeMux(slog.New(logutils.NewSlogTextHandler(io.Discard, logutils.SlogTextHandlerConfig{})), leveler))
+ logger := slog.New(logutils.NewSlogTextHandler(io.Discard, logutils.SlogTextHandlerConfig{}))
+
+ mux := http.NewServeMux()
+ RegisterProfilingHandlers(mux, logger)
+ RegisterLogLevelHandlers(mux, logger, leveler)
+
+ ts := httptest.NewServer(mux)
t.Cleanup(func() { ts.Close() })
+
return leveler, ts
}