diff --git a/docs/pages/includes/machine-id/common-output-config.yaml b/docs/pages/includes/machine-id/common-output-config.yaml
index 37a2f0ec37b8e..df32295a9afcf 100644
--- a/docs/pages/includes/machine-id/common-output-config.yaml
+++ b/docs/pages/includes/machine-id/common-output-config.yaml
@@ -25,3 +25,8 @@ roles:
# on the next invocation, but don't want long-lived workload certificates on-disk.
credential_ttl: 30m
renewal_interval: 15m
+
+# name optionally overrides the name of the service used in logs and the `/readyz`
+# endpoint. It must only contain letters, numbers, hyphens, underscores, and plus
+# symbols.
+name: my-service-name
diff --git a/docs/pages/reference/machine-id/configuration.mdx b/docs/pages/reference/machine-id/configuration.mdx
index c6b18a8fafe7e..d5fc2310eac50 100644
--- a/docs/pages/reference/machine-id/configuration.mdx
+++ b/docs/pages/reference/machine-id/configuration.mdx
@@ -366,6 +366,11 @@ renewal_interval: 15m
# plugin is used to automatically refresh the credentials within a single
# invocation of `kubectl`. Defaults to `false`.
disable_exec_plugin: false
+
+# name optionally overrides the name of the service used in logs and the `/readyz`
+# endpoint. It must only contain letters, numbers, hyphens, underscores, and plus
+# symbols.
+name: my-service-name
```
Each Kubernetes cluster matching a selector will result in a new context in the
@@ -456,6 +461,10 @@ audiences:
- foo.example.com
(!docs/pages/includes/machine-id/workload-identity-selector-config.yaml!)
(!docs/pages/includes/machine-id/common-output-config.yaml!)
+# name optionally overrides the name of the service used in logs and the `/readyz`
+# endpoint. It must only contain letters, numbers, hyphens, underscores, and plus
+# symbols.
+name: my-service-name
```
### `workload-identity-aws-roles-anywhere`
@@ -523,6 +532,10 @@ artifact_name: my-credentials-file
# defaults to `false`.
overwrite_credential_file: false
(!docs/pages/includes/machine-id/workload-identity-selector-config.yaml!)
+# name optionally overrides the name of the service used in logs and the `/readyz`
+# endpoint. It must only contain letters, numbers, hyphens, underscores, and plus
+# symbols.
+name: my-service-name
```
### `spiffe-svid`
@@ -780,6 +793,10 @@ svids:
#
# If unspecified, the GID is not checked.
gid: 50
+# name optionally overrides the name of the service used in logs and the `/readyz`
+# endpoint. It must only contain letters, numbers, hyphens, underscores, and plus
+# symbols.
+name: my-service-name
```
#### Envoy SDS
@@ -947,6 +964,10 @@ database: postgres
# username is the name of the user on the specified database server to open a
# tunnel for.
username: postgres
+# name optionally overrides the name of the service used in logs and the `/readyz`
+# endpoint. It must only contain letters, numbers, hyphens, underscores, and plus
+# symbols.
+name: my-service-name
```
The `database-tunnel` service will not start if `tbot` has been configured
@@ -978,6 +999,10 @@ listen: tcp://127.0.0.1:8084
# app_name is the name of the application, as configured in Teleport, that
# the service should open a tunnel to.
app_name: my-application
+# name optionally overrides the name of the service used in logs and the `/readyz`
+# endpoint. It must only contain letters, numbers, hyphens, underscores, and plus
+# symbols.
+name: my-service-name
```
The `application-tunnel` service will not start if `tbot` has been configured
@@ -1036,6 +1061,10 @@ proxy_command:
#
# If unspecified, proxy templates will not be used.
proxy_templates_path: /etc/my-proxy-templates.yaml
+# name optionally overrides the name of the service used in logs and the `/readyz`
+# endpoint. It must only contain letters, numbers, hyphens, underscores, and plus
+# symbols.
+name: my-service-name
```
Once configured, `tbot` will create the following artifacts in the specified
diff --git a/docs/pages/reference/machine-id/diagnostics-service.mdx b/docs/pages/reference/machine-id/diagnostics-service.mdx
index 23859771f51d8..45e3440b2483f 100644
--- a/docs/pages/reference/machine-id/diagnostics-service.mdx
+++ b/docs/pages/reference/machine-id/diagnostics-service.mdx
@@ -40,13 +40,65 @@ to determine if the `tbot` process is running and has not crashed or hung.
If deploying to Kubernetes, we recommend this endpoint is used for your
Liveness Probe.
-### `/readyz`
+### `/readyz` and `/readyz/{service}`
-The `/readyz` endpoint currently returns the same information as `/livez`.
+The `/readyz` endpoint returns the overall health of `tbot`, including all of
+its internal and user-defined services. If all services are healthy, it will
+respond with a 200 status code. If any service is unhealthy, it will respond
+with a 503 status code.
-In the future, this endpoint will be expanded to indicate whether the internal
-components of `tbot` have been able to generate certificates and are ready
-to serve requests.
+```code
+$ curl -v http://127.0.0.1:3001/readyz
+
+HTTP/1.1 503 Service Unavailable
+Content-Type: application/json
+
+{
+ "status": "unhealthy",
+ "services": {
+ "ca-rotation": {
+ "status": "healthy"
+ },
+ "heartbeat": {
+ "status": "healthy"
+ },
+ "identity": {
+ "status": "healthy"
+ },
+ "aws-roles-anywhere": {
+ "status": "unhealthy",
+ "reason": "access denied to perform action \"read\" on \"workload_identity\""
+ }
+ }
+}
+```
+
+If deploying to Kubernetes, we recommend this endpoint is used for your
+Readiness Probe.
+
+You can also use the `/readyz/{service}` endpoint to query the health of a
+specific service.
+
+```code
+$ curl -v http://127.0.0.1:3001/readyz/aws-roles-anywhere
+
+HTTP/1.1 200 OK
+Content-Type: application/json
+
+{
+ "status": "healthy"
+}
+```
+
+By default, `tbot` generates service names based on their configuration such as
+the output destination. You can override this by providing your own name in the
+`tbot` configuration file.
+
+```yaml
+services:
+ - type: identity
+ name: my-service-123
+```
### `/metrics`
diff --git a/lib/tbot/config/config.go b/lib/tbot/config/config.go
index a9025abb61c4e..eeabf11e32ac9 100644
--- a/lib/tbot/config/config.go
+++ b/lib/tbot/config/config.go
@@ -26,6 +26,7 @@ import (
"fmt"
"io"
"net/url"
+ "regexp"
"slices"
"strings"
"time"
@@ -65,6 +66,38 @@ var SupportedJoinMethods = []string{
string(types.JoinMethodTerraformCloud),
}
+// ReservedServiceNames are the service names reserved for internal use.
+var ReservedServiceNames = []string{
+ "ca-rotation",
+ "crl-cache",
+ "heartbeat",
+ "identity",
+ "spiffe-trust-bundle-cache",
+}
+
+var reservedServiceNamesMap = func() map[string]struct{} {
+ m := make(map[string]struct{}, len(ReservedServiceNames))
+ for _, k := range ReservedServiceNames {
+ m[k] = struct{}{}
+ }
+ return m
+}()
+
+var serviceNameRegex = regexp.MustCompile(`\A[a-z\d_\-+]+\z`)
+
+func validateServiceName(name string) error {
+ if name == "" {
+ return nil
+ }
+ if _, ok := reservedServiceNamesMap[name]; ok {
+ return trace.BadParameter("service name %q is reserved for internal use", name)
+ }
+ if !serviceNameRegex.MatchString(name) {
+ return trace.BadParameter("invalid service name: %q, may only contain lowercase letters, numbers, hyphens, underscores, or plus symbols", name)
+ }
+ return nil
+}
+
var log = logutils.NewPackageLogger(teleport.ComponentKey, teleport.ComponentTBot)
// AzureOnboardingConfig holds configuration relevant to the "azure" join method.
@@ -288,6 +321,7 @@ func (conf *BotConfig) CheckAndSetDefaults() error {
// We've migrated Outputs to Services, so copy all Outputs to Services.
conf.Services = append(conf.Services, conf.Outputs...)
+ uniqueNames := make(map[string]struct{}, len(conf.Services))
for i, service := range conf.Services {
if err := service.CheckAndSetDefaults(); err != nil {
return trace.Wrap(err, "validating service[%d]", i)
@@ -295,6 +329,15 @@ func (conf *BotConfig) CheckAndSetDefaults() error {
if err := service.GetCredentialLifetime().Validate(conf.Oneshot); err != nil {
return trace.Wrap(err, "validating service[%d]", i)
}
+ if name := service.GetName(); name != "" {
+ if err := validateServiceName(name); err != nil {
+ return trace.Wrap(err, "validating service[%d]", i)
+ }
+ if _, seen := uniqueNames[name]; seen {
+ return trace.BadParameter("validating service[%d]: duplicate name: %q", i, name)
+ }
+ uniqueNames[name] = struct{}{}
+ }
}
destinationPaths := map[string]int{}
@@ -386,6 +429,10 @@ type ServiceConfig interface {
// RenewalInterval. It's used for validation purposes; services that do not
// support these options should return the zero value.
GetCredentialLifetime() CredentialLifetime
+
+ // GetName returns the user-given name of the service, used for validation
+ // purposes.
+ GetName() string
}
// ServiceConfigs assists polymorphic unmarshaling of a slice of ServiceConfigs.
diff --git a/lib/tbot/config/config_test.go b/lib/tbot/config/config_test.go
index 5d4eff9af406a..21ab98e1f1db7 100644
--- a/lib/tbot/config/config_test.go
+++ b/lib/tbot/config/config_test.go
@@ -578,3 +578,59 @@ func TestBotConfig_Base64(t *testing.T) {
})
}
}
+
+func TestBotConfig_NameValidation(t *testing.T) {
+ t.Parallel()
+
+ testCases := map[string]struct {
+ cfg *BotConfig
+ err string
+ }{
+ "duplicate names": {
+ cfg: &BotConfig{
+ Version: V2,
+ Services: ServiceConfigs{
+ &IdentityOutput{
+ Name: "foo",
+ Destination: &DestinationMemory{},
+ },
+ &IdentityOutput{
+ Name: "foo",
+ Destination: &DestinationMemory{},
+ },
+ },
+ },
+ err: `duplicate name: "foo`,
+ },
+ "reserved name": {
+ cfg: &BotConfig{
+ Version: V2,
+ Services: ServiceConfigs{
+ &IdentityOutput{
+ Name: "identity",
+ Destination: &DestinationMemory{},
+ },
+ },
+ },
+ err: `service name "identity" is reserved for internal use`,
+ },
+ "invalid name": {
+ cfg: &BotConfig{
+ Version: V2,
+ Services: ServiceConfigs{
+ &IdentityOutput{
+ Name: "hello, world!",
+ Destination: &DestinationMemory{},
+ },
+ },
+ },
+ err: `may only contain lowercase letters`,
+ },
+ }
+ for desc, tc := range testCases {
+ t.Run(desc, func(t *testing.T) {
+ t.Parallel()
+ require.ErrorContains(t, tc.cfg.CheckAndSetDefaults(), tc.err)
+ })
+ }
+}
diff --git a/lib/tbot/config/service_application.go b/lib/tbot/config/service_application.go
index cd4148d702910..3912f10994cf3 100644
--- a/lib/tbot/config/service_application.go
+++ b/lib/tbot/config/service_application.go
@@ -35,6 +35,8 @@ var (
const ApplicationOutputType = "application"
type ApplicationOutput struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Destination is where the credentials should be written to.
Destination bot.Destination `yaml:"destination"`
// Roles is the list of roles to request for the generated credentials.
@@ -68,6 +70,11 @@ func (o *ApplicationOutput) CheckAndSetDefaults() error {
return nil
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *ApplicationOutput) GetName() string {
+ return o.Name
+}
+
func (o *ApplicationOutput) GetDestination() bot.Destination {
return o.Destination
}
diff --git a/lib/tbot/config/service_application_tunnel.go b/lib/tbot/config/service_application_tunnel.go
index ede5b1a4ac44f..d3da1300053ac 100644
--- a/lib/tbot/config/service_application_tunnel.go
+++ b/lib/tbot/config/service_application_tunnel.go
@@ -35,6 +35,8 @@ const ApplicationTunnelServiceType = "application-tunnel"
// ApplicationTunnelService opens an authenticated tunnel for Application
// Access.
type ApplicationTunnelService struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Listen is the address on which database tunnel should listen. Example:
// - "tcp://127.0.0.1:3306"
// - "tcp://0.0.0.0:3306
@@ -59,7 +61,12 @@ func (s *ApplicationTunnelService) Type() string {
return ApplicationTunnelServiceType
}
-func (s *ApplicationTunnelService) MarshalYAML() (interface{}, error) {
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *ApplicationTunnelService) GetName() string {
+ return o.Name
+}
+
+func (s *ApplicationTunnelService) MarshalYAML() (any, error) {
type raw ApplicationTunnelService
return withTypeHeader((*raw)(s), ApplicationTunnelServiceType)
}
diff --git a/lib/tbot/config/service_client_credential.go b/lib/tbot/config/service_client_credential.go
index f685aa7100c99..12ffb9835b4a3 100644
--- a/lib/tbot/config/service_client_credential.go
+++ b/lib/tbot/config/service_client_credential.go
@@ -47,11 +47,19 @@ var (
// be modified. This output is currently part of an experiment and could be
// removed in a future release.
type UnstableClientCredentialOutput struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
+
mu sync.Mutex
facade *identity.Facade
ready chan struct{}
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *UnstableClientCredentialOutput) GetName() string {
+ return o.Name
+}
+
// Ready returns a channel which closes when the Output is ready to be used
// as a client credential. Using this as a credential before Ready closes is
// unsupported.
diff --git a/lib/tbot/config/service_database.go b/lib/tbot/config/service_database.go
index 205e5df49f3c7..e7e725cc11ed7 100644
--- a/lib/tbot/config/service_database.go
+++ b/lib/tbot/config/service_database.go
@@ -76,6 +76,8 @@ var (
// DatabaseOutput produces credentials which can be used to connect to a
// database through teleport.
type DatabaseOutput struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Destination is where the credentials should be written to.
Destination bot.Destination `yaml:"destination"`
// Roles is the list of roles to request for the generated credentials.
@@ -101,6 +103,11 @@ type DatabaseOutput struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *DatabaseOutput) GetName() string {
+ return o.Name
+}
+
func (o *DatabaseOutput) Init(ctx context.Context) error {
subDirs := []string{}
if o.Format == CockroachDatabaseFormat {
diff --git a/lib/tbot/config/service_database_tunnel.go b/lib/tbot/config/service_database_tunnel.go
index 0e68563980a8e..9b03e5945f124 100644
--- a/lib/tbot/config/service_database_tunnel.go
+++ b/lib/tbot/config/service_database_tunnel.go
@@ -30,6 +30,8 @@ const DatabaseTunnelServiceType = "database-tunnel"
// DatabaseTunnelService opens an authenticated tunnel for Database Access.
type DatabaseTunnelService struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Listen is the address on which database tunnel should listen. Example:
// - "tcp://127.0.0.1:3306"
// - "tcp://0.0.0.0:3306
@@ -55,6 +57,11 @@ type DatabaseTunnelService struct {
Listener net.Listener `yaml:"-"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *DatabaseTunnelService) GetName() string {
+ return o.Name
+}
+
func (s *DatabaseTunnelService) Type() string {
return DatabaseTunnelServiceType
}
diff --git a/lib/tbot/config/service_example.go b/lib/tbot/config/service_example.go
index 7a09dd38b9cbd..7eabf9eb3bcd2 100644
--- a/lib/tbot/config/service_example.go
+++ b/lib/tbot/config/service_example.go
@@ -29,6 +29,9 @@ const ExampleServiceType = "example"
// not intended to be used and exists to demonstrate how a user configurable
// service integrates with the tbot service manager.
type ExampleService struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
+
Message string `yaml:"message"`
}
@@ -36,7 +39,12 @@ func (s *ExampleService) Type() string {
return ExampleServiceType
}
-func (s *ExampleService) MarshalYAML() (interface{}, error) {
+// GetName returns the user-given name of the service, used for validation purposes.
+func (s *ExampleService) GetName() string {
+ return s.Name
+}
+
+func (s *ExampleService) MarshalYAML() (any, error) {
type raw ExampleService
return withTypeHeader((*raw)(s), ExampleServiceType)
}
diff --git a/lib/tbot/config/service_identity.go b/lib/tbot/config/service_identity.go
index 8d51fd43564db..0e0827e8fdcf5 100644
--- a/lib/tbot/config/service_identity.go
+++ b/lib/tbot/config/service_identity.go
@@ -75,6 +75,8 @@ var (
// It cannot be used to connect to Applications, Databases or Kubernetes
// Clusters.
type IdentityOutput struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Destination is where the credentials should be written to.
Destination bot.Destination `yaml:"destination"`
// Roles is the list of roles to request for the generated credentials.
@@ -107,6 +109,11 @@ type IdentityOutput struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *IdentityOutput) GetName() string {
+ return o.Name
+}
+
func (o *IdentityOutput) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
}
diff --git a/lib/tbot/config/service_kubernetes.go b/lib/tbot/config/service_kubernetes.go
index 09a7572bba49a..c15d5889f1cfc 100644
--- a/lib/tbot/config/service_kubernetes.go
+++ b/lib/tbot/config/service_kubernetes.go
@@ -37,6 +37,8 @@ const KubernetesOutputType = "kubernetes"
// KubernetesOutput produces credentials which can be used to connect to a
// Kubernetes Cluster through teleport.
type KubernetesOutput struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Destination is where the credentials should be written to.
Destination bot.Destination `yaml:"destination"`
// Roles is the list of roles to request for the generated credentials.
@@ -60,6 +62,11 @@ type KubernetesOutput struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *KubernetesOutput) GetName() string {
+ return o.Name
+}
+
func (o *KubernetesOutput) CheckAndSetDefaults() error {
if err := validateOutputDestination(o.Destination); err != nil {
return trace.Wrap(err)
diff --git a/lib/tbot/config/service_kubernetes_v2.go b/lib/tbot/config/service_kubernetes_v2.go
index eb0b651dd6ce2..5fd27b8a10479 100644
--- a/lib/tbot/config/service_kubernetes_v2.go
+++ b/lib/tbot/config/service_kubernetes_v2.go
@@ -37,6 +37,8 @@ const KubernetesV2OutputType = "kubernetes/v2"
// KubernetesOutput produces credentials which can be used to connect to a
// Kubernetes Cluster through teleport.
type KubernetesV2Output struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Destination is where the credentials should be written to.
Destination bot.Destination `yaml:"destination"`
@@ -56,6 +58,11 @@ type KubernetesV2Output struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *KubernetesV2Output) GetName() string {
+ return o.Name
+}
+
func (o *KubernetesV2Output) CheckAndSetDefaults() error {
if err := validateOutputDestination(o.Destination); err != nil {
return trace.Wrap(err)
diff --git a/lib/tbot/config/service_spiffe_svid.go b/lib/tbot/config/service_spiffe_svid.go
index 5e208a35074e5..89cfec1d242c3 100644
--- a/lib/tbot/config/service_spiffe_svid.go
+++ b/lib/tbot/config/service_spiffe_svid.go
@@ -117,6 +117,8 @@ func (o JWTSVID) CheckAndSetDefaults() error {
// SPIFFESVIDOutput is the configuration for the SPIFFE SVID output.
// Emulates the output of https://github.com/spiffe/spiffe-helper
type SPIFFESVIDOutput struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Destination is where the credentials should be written to.
Destination bot.Destination `yaml:"destination"`
SVID SVIDRequest `yaml:"svid"`
@@ -130,6 +132,11 @@ type SPIFFESVIDOutput struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *SPIFFESVIDOutput) GetName() string {
+ return o.Name
+}
+
// Init initializes the destination.
func (o *SPIFFESVIDOutput) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
diff --git a/lib/tbot/config/service_spiffe_workload_api.go b/lib/tbot/config/service_spiffe_workload_api.go
index c5e0917bf0666..a1c2e78398641 100644
--- a/lib/tbot/config/service_spiffe_workload_api.go
+++ b/lib/tbot/config/service_spiffe_workload_api.go
@@ -114,6 +114,8 @@ func (o SVIDRequestRule) LogValue() slog.Value {
// SPIFFEWorkloadAPIService is the configuration for the SPIFFE Workload API
// service.
type SPIFFEWorkloadAPIService struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Listen is the address on which the SPIFFE Workload API server should
// listen. This should either be prefixed with "unix://" or "tcp://".
Listen string `yaml:"listen"`
@@ -132,6 +134,11 @@ type SPIFFEWorkloadAPIService struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *SPIFFEWorkloadAPIService) GetName() string {
+ return o.Name
+}
+
func (s *SPIFFEWorkloadAPIService) Type() string {
return SPIFFEWorkloadAPIServiceType
}
diff --git a/lib/tbot/config/service_ssh_host.go b/lib/tbot/config/service_ssh_host.go
index 7729cfc0a8c06..16a36b4a0448b 100644
--- a/lib/tbot/config/service_ssh_host.go
+++ b/lib/tbot/config/service_ssh_host.go
@@ -49,6 +49,8 @@ var (
// SSHHostOutput generates a host certificate signed by the Teleport CA. This
// can be used to allow OpenSSH server to be trusted by Teleport SSH clients.
type SSHHostOutput struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Destination is where the credentials should be written to.
Destination bot.Destination `yaml:"destination"`
// Roles is the list of roles to request for the generated credentials.
@@ -63,6 +65,11 @@ type SSHHostOutput struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *SSHHostOutput) GetName() string {
+ return o.Name
+}
+
func (o *SSHHostOutput) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
}
diff --git a/lib/tbot/config/service_ssh_multiplexer.go b/lib/tbot/config/service_ssh_multiplexer.go
index 3d538647518cc..6d98324f2b74a 100644
--- a/lib/tbot/config/service_ssh_multiplexer.go
+++ b/lib/tbot/config/service_ssh_multiplexer.go
@@ -30,6 +30,8 @@ const SSHMultiplexerServiceType = "ssh-multiplexer"
// SSHMultiplexerService is the configuration for the `ssh-proxy` service
type SSHMultiplexerService struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Destination is where the config and tunnel should be written to. It
// should be a DestinationDirectory.
Destination bot.Destination `yaml:"destination"`
@@ -54,6 +56,11 @@ type SSHMultiplexerService struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *SSHMultiplexerService) GetName() string {
+ return o.Name
+}
+
func (s *SSHMultiplexerService) SessionResumptionEnabled() bool {
if s.EnableResumption == nil {
return true
diff --git a/lib/tbot/config/service_workload_identity_api.go b/lib/tbot/config/service_workload_identity_api.go
index dd0890422e227..1d6b1e6b795e4 100644
--- a/lib/tbot/config/service_workload_identity_api.go
+++ b/lib/tbot/config/service_workload_identity_api.go
@@ -32,6 +32,8 @@ var (
// WorkloadIdentityAPIService is the configuration for the
// WorkloadIdentityAPIService
type WorkloadIdentityAPIService struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Listen is the address on which the SPIFFE Workload API server should
// listen. This should either be prefixed with "unix://" or "tcp://".
Listen string `yaml:"listen"`
@@ -60,6 +62,11 @@ func (o *WorkloadIdentityAPIService) CheckAndSetDefaults() error {
return nil
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *WorkloadIdentityAPIService) GetName() string {
+ return o.Name
+}
+
// Type returns the type of the service.
func (o *WorkloadIdentityAPIService) Type() string {
return WorkloadIdentityAPIServiceType
diff --git a/lib/tbot/config/service_workload_identity_aws_ra.go b/lib/tbot/config/service_workload_identity_aws_ra.go
index aefc1ee00b6b7..6fbc269f28fea 100644
--- a/lib/tbot/config/service_workload_identity_aws_ra.go
+++ b/lib/tbot/config/service_workload_identity_aws_ra.go
@@ -43,6 +43,8 @@ var (
// WorkloadIdentityAWSRAService is the configuration for the
// WorkloadIdentityAWSRAService
type WorkloadIdentityAWSRAService struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Selector is the selector for the WorkloadIdentity resource that will be
// used to issue WICs.
Selector WorkloadIdentitySelector `yaml:"selector"`
@@ -96,6 +98,11 @@ type WorkloadIdentityAWSRAService struct {
EndpointOverride string `yaml:"-"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o *WorkloadIdentityAWSRAService) GetName() string {
+ return o.Name
+}
+
// Init initializes the destination.
func (o *WorkloadIdentityAWSRAService) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
diff --git a/lib/tbot/config/service_workload_identity_jwt.go b/lib/tbot/config/service_workload_identity_jwt.go
index a46063380d363..2c95a36488a90 100644
--- a/lib/tbot/config/service_workload_identity_jwt.go
+++ b/lib/tbot/config/service_workload_identity_jwt.go
@@ -34,6 +34,8 @@ var (
// WorkloadIdentityJWTService is the configuration for the WorkloadIdentityJWTService
type WorkloadIdentityJWTService struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Selector is the selector for the WorkloadIdentity resource that will be
// used to issue WICs.
Selector WorkloadIdentitySelector `yaml:"selector"`
@@ -47,6 +49,11 @@ type WorkloadIdentityJWTService struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o WorkloadIdentityJWTService) GetName() string {
+ return o.Name
+}
+
// Init initializes the destination.
func (o *WorkloadIdentityJWTService) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
diff --git a/lib/tbot/config/service_workload_identity_x509.go b/lib/tbot/config/service_workload_identity_x509.go
index 80fd9d9a2885a..0a2469cd09089 100644
--- a/lib/tbot/config/service_workload_identity_x509.go
+++ b/lib/tbot/config/service_workload_identity_x509.go
@@ -63,6 +63,8 @@ func (s *WorkloadIdentitySelector) CheckAndSetDefaults() error {
// WorkloadIdentityX509Service is the configuration for the WorkloadIdentityX509Service
// Emulates the output of https://github.com/spiffe/spiffe-helper
type WorkloadIdentityX509Service struct {
+ // Name of the service for logs and the /readyz endpoint.
+ Name string `yaml:"name,omitempty"`
// Selector is the selector for the WorkloadIdentity resource that will be
// used to issue WICs.
Selector WorkloadIdentitySelector `yaml:"selector"`
@@ -77,6 +79,11 @@ type WorkloadIdentityX509Service struct {
CredentialLifetime CredentialLifetime `yaml:",inline"`
}
+// GetName returns the user-given name of the service, used for validation purposes.
+func (o WorkloadIdentityX509Service) GetName() string {
+ return o.Name
+}
+
// Init initializes the destination.
func (o *WorkloadIdentityX509Service) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
diff --git a/lib/tbot/loop.go b/lib/tbot/loop.go
index af9cef8940a0e..c941f27482cfd 100644
--- a/lib/tbot/loop.go
+++ b/lib/tbot/loop.go
@@ -29,6 +29,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/gravitational/teleport/api/utils/retryutils"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
var (
@@ -80,13 +81,10 @@ type runOnIntervalConfig struct {
// service doesn't support gracefully degrading when there is no API client
// available.
identityReadyCh <-chan struct{}
+ statusReporter readyz.Reporter
}
-// runOnInterval runs a function on a given interval, with retries and jitter.
-//
-// TODO(noah): Emit Prometheus metrics for:
-// - Time of next attempt
-func runOnInterval(ctx context.Context, cfg runOnIntervalConfig) error {
+func (cfg *runOnIntervalConfig) checkAndSetDefaults() error {
switch {
case cfg.interval <= 0:
return trace.BadParameter("interval must be greater than 0")
@@ -100,6 +98,25 @@ func runOnInterval(ctx context.Context, cfg runOnIntervalConfig) error {
return trace.BadParameter("name is required")
}
+ if cfg.clock == nil {
+ cfg.clock = clockwork.NewRealClock()
+ }
+ if cfg.statusReporter == nil {
+ cfg.statusReporter = readyz.NoopReporter()
+ }
+
+ return nil
+}
+
+// runOnInterval runs a function on a given interval, with retries and jitter.
+//
+// TODO(noah): Emit Prometheus metrics for:
+// - Time of next attempt
+func runOnInterval(ctx context.Context, cfg runOnIntervalConfig) error {
+ if err := cfg.checkAndSetDefaults(); err != nil {
+ return err
+ }
+
log := cfg.log.With("task", cfg.name)
if cfg.identityReadyCh != nil {
@@ -115,10 +132,6 @@ func runOnInterval(ctx context.Context, cfg runOnIntervalConfig) error {
}
}
- if cfg.clock == nil {
- cfg.clock = clockwork.NewRealClock()
- }
-
ticker := cfg.clock.NewTicker(cfg.interval)
defer ticker.Stop()
jitter := retryutils.DefaultJitter
@@ -147,6 +160,7 @@ func runOnInterval(ctx context.Context, cfg runOnIntervalConfig) error {
)
err = cfg.f(ctx)
if err == nil {
+ cfg.statusReporter.Report(readyz.Healthy)
loopIterationsSuccessCounter.WithLabelValues(cfg.service, cfg.name).Observe(float64(attempt - 1))
break
}
@@ -177,6 +191,7 @@ func runOnInterval(ctx context.Context, cfg runOnIntervalConfig) error {
loopIterationTime.WithLabelValues(cfg.service, cfg.name).Observe(time.Since(startTime).Seconds())
if err != nil {
+ cfg.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
loopIterationsFailureCounter.WithLabelValues(cfg.service, cfg.name).Inc()
if cfg.exitOnRetryExhausted {
diff --git a/lib/tbot/readyz/http.go b/lib/tbot/readyz/http.go
new file mode 100644
index 0000000000000..f2a9472beec59
--- /dev/null
+++ b/lib/tbot/readyz/http.go
@@ -0,0 +1,79 @@
+/*
+ * Teleport
+ * Copyright (C) 2025 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package readyz
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "log/slog"
+ "net/http"
+)
+
+// HTTPHandler returns an HTTP handler that implements tbot's
+// /readyz(/{service}) endpoints.
+func HTTPHandler(reg *Registry) http.Handler {
+ mux := http.NewServeMux()
+
+ mux.Handle("/readyz/{service}", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+
+ status, ok := reg.ServiceStatus(r.PathValue("service"))
+ if !ok {
+ w.WriteHeader(http.StatusNotFound)
+ if err := writeJSON(w, struct {
+ Error string `json:"error"`
+ }{
+ fmt.Sprintf("Service named %q not found.", r.PathValue("service")),
+ }); err != nil {
+ slog.ErrorContext(r.Context(), "Failed to write response", "error", err)
+ }
+ return
+ }
+
+ w.WriteHeader(status.Status.HTTPStatusCode())
+ if err := writeJSON(w, status); err != nil {
+ slog.ErrorContext(r.Context(), "Failed to write response", "error", err)
+ }
+ }))
+
+ mux.Handle("/readyz", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ status := reg.OverallStatus()
+
+ w.Header().Set("Content-Type", "application/json")
+ w.WriteHeader(status.Status.HTTPStatusCode())
+
+ if err := writeJSON(w, status); err != nil {
+ slog.ErrorContext(r.Context(), "Failed to write response", "error", err)
+ }
+ }))
+
+ return mux
+}
+
+func writeJSON(w io.Writer, v any) error {
+ output, err := json.MarshalIndent(v, "", " ")
+ if err != nil {
+ return err
+ }
+ if _, err := w.Write(output); err != nil {
+ return err
+ }
+ return nil
+}
diff --git a/lib/tbot/readyz/readyz.go b/lib/tbot/readyz/readyz.go
new file mode 100644
index 0000000000000..4462c7c1fe4bc
--- /dev/null
+++ b/lib/tbot/readyz/readyz.go
@@ -0,0 +1,113 @@
+/*
+ * Teleport
+ * Copyright (C) 2025 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package readyz
+
+import "sync"
+
+// NewRegistry returns a Registry to track the health of tbot's services.
+func NewRegistry() *Registry {
+ return &Registry{
+ services: make(map[string]*ServiceStatus),
+ }
+}
+
+// Registry tracks the status/health of tbot's services.
+type Registry struct {
+ mu sync.Mutex
+ services map[string]*ServiceStatus
+}
+
+// AddService adds a service to the registry so that its health will be reported
+// from our readyz endpoints. It returns a Reporter the service can use to report
+// status changes.
+func (r *Registry) AddService(name string) Reporter {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ status, ok := r.services[name]
+ if !ok {
+ status = &ServiceStatus{}
+ r.services[name] = status
+ }
+ return &reporter{
+ mu: &r.mu,
+ status: status,
+ }
+}
+
+// ServiceStatus reads the named service's status. The bool value will be false
+// if the service has not been registered.
+func (r *Registry) ServiceStatus(name string) (*ServiceStatus, bool) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ if status, ok := r.services[name]; ok {
+ return status.Clone(), true
+ }
+
+ return nil, false
+}
+
+// OverallStatus returns tbot's overall status when taking service statuses into
+// account.
+func (r *Registry) OverallStatus() *OverallStatus {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ status := Healthy
+ services := make(map[string]*ServiceStatus, len(r.services))
+
+ for name, svc := range r.services {
+ services[name] = svc.Clone()
+
+ if svc.Status != Healthy {
+ status = Unhealthy
+ }
+ }
+
+ return &OverallStatus{
+ Status: status,
+ Services: services,
+ }
+}
+
+// ServiceStatus is a snapshot of the service's status.
+type ServiceStatus struct {
+ // Status of the service.
+ Status Status `json:"status"`
+
+ // Reason string describing why the service has its current status.
+ Reason string `json:"reason,omitempty"`
+}
+
+// Clone the status to avoid data races.
+func (s *ServiceStatus) Clone() *ServiceStatus {
+ clone := *s
+ return &clone
+}
+
+// OverallStatus is tbot's overall aggregate status.
+type OverallStatus struct {
+ // Status of tbot overall. If any service isn't Healthy, the overall status
+ // will be Unhealthy.
+ Status Status `json:"status"`
+
+ // Services contains the service-specific statuses.
+ Services map[string]*ServiceStatus `json:"services"`
+}
diff --git a/lib/tbot/readyz/readyz_test.go b/lib/tbot/readyz/readyz_test.go
new file mode 100644
index 0000000000000..40641ab9d4a01
--- /dev/null
+++ b/lib/tbot/readyz/readyz_test.go
@@ -0,0 +1,148 @@
+/*
+ * Teleport
+ * Copyright (C) 2025 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package readyz_test
+
+import (
+ "encoding/json"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+
+ "github.com/gravitational/teleport/lib/tbot/readyz"
+)
+
+func TestReadyz(t *testing.T) {
+ t.Parallel()
+
+ reg := readyz.NewRegistry()
+
+ a := reg.AddService("a")
+ b := reg.AddService("b")
+
+ srv := httptest.NewServer(readyz.HTTPHandler(reg))
+ srv.URL = srv.URL + "/readyz"
+ t.Cleanup(srv.Close)
+
+ t.Run("initial state - overall", func(t *testing.T) {
+ rsp, err := http.Get(srv.URL)
+ require.NoError(t, err)
+ defer rsp.Body.Close()
+
+ require.Equal(t, http.StatusServiceUnavailable, rsp.StatusCode)
+
+ var response readyz.OverallStatus
+ err = json.NewDecoder(rsp.Body).Decode(&response)
+ require.NoError(t, err)
+
+ require.Equal(t,
+ readyz.OverallStatus{
+ Status: readyz.Unhealthy,
+ Services: map[string]*readyz.ServiceStatus{
+ "a": {Status: readyz.Initializing},
+ "b": {Status: readyz.Initializing},
+ },
+ },
+ response,
+ )
+ })
+
+ t.Run("individual service", func(t *testing.T) {
+ a.ReportReason(readyz.Unhealthy, "database is down")
+
+ rsp, err := http.Get(srv.URL + "/a")
+ require.NoError(t, err)
+ defer rsp.Body.Close()
+
+ require.Equal(t, http.StatusServiceUnavailable, rsp.StatusCode)
+
+ var response readyz.ServiceStatus
+ err = json.NewDecoder(rsp.Body).Decode(&response)
+ require.NoError(t, err)
+
+ require.Equal(t,
+ readyz.ServiceStatus{
+ Status: readyz.Unhealthy,
+ Reason: "database is down",
+ },
+ response,
+ )
+ })
+
+ t.Run("mixed state", func(t *testing.T) {
+ a.Report(readyz.Healthy)
+ b.ReportReason(readyz.Unhealthy, "database is down")
+
+ rsp, err := http.Get(srv.URL)
+ require.NoError(t, err)
+ defer rsp.Body.Close()
+
+ require.Equal(t, http.StatusServiceUnavailable, rsp.StatusCode)
+
+ var response readyz.OverallStatus
+ err = json.NewDecoder(rsp.Body).Decode(&response)
+ require.NoError(t, err)
+
+ require.Equal(t,
+ readyz.OverallStatus{
+ Status: readyz.Unhealthy,
+ Services: map[string]*readyz.ServiceStatus{
+ "a": {Status: readyz.Healthy},
+ "b": {Status: readyz.Unhealthy, Reason: "database is down"},
+ },
+ },
+ response,
+ )
+ })
+
+ t.Run("all healthy", func(t *testing.T) {
+ a.Report(readyz.Healthy)
+ b.Report(readyz.Healthy)
+
+ rsp, err := http.Get(srv.URL)
+ require.NoError(t, err)
+ defer rsp.Body.Close()
+
+ require.Equal(t, http.StatusOK, rsp.StatusCode)
+
+ var response readyz.OverallStatus
+ err = json.NewDecoder(rsp.Body).Decode(&response)
+ require.NoError(t, err)
+
+ require.Equal(t,
+ readyz.OverallStatus{
+ Status: readyz.Healthy,
+ Services: map[string]*readyz.ServiceStatus{
+ "a": {Status: readyz.Healthy},
+ "b": {Status: readyz.Healthy},
+ },
+ },
+ response,
+ )
+ })
+
+ t.Run("unknown service", func(t *testing.T) {
+ rsp, err := http.Get(srv.URL + "/foo")
+ require.NoError(t, err)
+ defer rsp.Body.Close()
+
+ require.Equal(t, http.StatusNotFound, rsp.StatusCode)
+ })
+}
diff --git a/lib/tbot/readyz/reporter.go b/lib/tbot/readyz/reporter.go
new file mode 100644
index 0000000000000..3620bb96ef3fb
--- /dev/null
+++ b/lib/tbot/readyz/reporter.go
@@ -0,0 +1,58 @@
+/*
+ * Teleport
+ * Copyright (C) 2025 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package readyz
+
+import "sync"
+
+// Reporter can be used by a service to report its status.
+type Reporter interface {
+ // Report the service's status.
+ Report(status Status)
+
+ // ReportReason reports the service's status including reason/description text.
+ ReportReason(status Status, reason string)
+}
+
+type reporter struct {
+ mu *sync.Mutex
+ status *ServiceStatus
+}
+
+func (r *reporter) Report(status Status) {
+ r.ReportReason(status, "")
+}
+
+func (r *reporter) ReportReason(status Status, reason string) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ r.status.Status = status
+ r.status.Reason = reason
+}
+
+// NoopReporter returns a no-op Reporter that can be used when no real reporter
+// is available (e.g. in tests).
+func NoopReporter() Reporter {
+ return noopReporter{}
+}
+
+type noopReporter struct{}
+
+func (noopReporter) Report(Status) {}
+func (noopReporter) ReportReason(Status, string) {}
diff --git a/lib/tbot/readyz/status.go b/lib/tbot/readyz/status.go
new file mode 100644
index 0000000000000..3d226c67023b6
--- /dev/null
+++ b/lib/tbot/readyz/status.go
@@ -0,0 +1,85 @@
+/*
+ * Teleport
+ * Copyright (C) 2025 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package readyz
+
+import (
+ "encoding/json"
+ "net/http"
+)
+
+// Status describes the healthiness of a service or tbot overall.
+type Status uint
+
+const (
+ // Initializing means no status has been reported for the service.
+ Initializing Status = iota
+
+ // Healthy means the service is healthy and ready to serve traffic or it has
+ // recently succeeded generating an output.
+ Healthy
+
+ // Unhealthy means the service is failing to serve traffic or generate output.
+ Unhealthy
+)
+
+// String implements fmt.Stringer.
+func (s Status) String() string {
+ switch s {
+ case Initializing:
+ return "initializing"
+ case Healthy:
+ return "healthy"
+ case Unhealthy:
+ return "unhealthy"
+ default:
+ return ""
+ }
+}
+
+// MarshalJSON implements json.Marshaler.
+func (s Status) MarshalJSON() ([]byte, error) {
+ return json.Marshal(s.String())
+}
+
+// MarshalJSON implements json.Unmarshaler.
+func (s *Status) UnmarshalJSON(j []byte) error {
+ var str string
+ if err := json.Unmarshal(j, &str); err != nil {
+ return err
+ }
+ switch str {
+ case "healthy":
+ *s = Healthy
+ case "unhealthy":
+ *s = Unhealthy
+ default:
+ *s = Initializing
+ }
+ return nil
+}
+
+// HTTPStatusCode returns the HTTP response code that represents this status.
+func (s Status) HTTPStatusCode() int {
+ switch s {
+ case Healthy:
+ return http.StatusOK
+ default:
+ return http.StatusServiceUnavailable
+ }
+}
diff --git a/lib/tbot/service_application_output.go b/lib/tbot/service_application_output.go
index fe9c2a8b569d4..5ac03188955c2 100644
--- a/lib/tbot/service_application_output.go
+++ b/lib/tbot/service_application_output.go
@@ -33,6 +33,7 @@ import (
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
// ApplicationOutputService generates the artifacts necessary to connect to a
@@ -46,10 +47,14 @@ type ApplicationOutputService struct {
log *slog.Logger
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
}
func (s *ApplicationOutputService) String() string {
- return fmt.Sprintf("application-output (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("application-output (%s)", s.cfg.Destination.String()),
+ )
}
func (s *ApplicationOutputService) OneShot(ctx context.Context) error {
@@ -69,6 +74,7 @@ func (s *ApplicationOutputService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_application_tunnel.go b/lib/tbot/service_application_tunnel.go
index 854afcb5c5d0e..7b1115a0ecf37 100644
--- a/lib/tbot/service_application_tunnel.go
+++ b/lib/tbot/service_application_tunnel.go
@@ -35,6 +35,7 @@ import (
"github.com/gravitational/teleport/lib/srv/alpnproxy/common"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/utils"
)
@@ -51,6 +52,7 @@ type ApplicationTunnelService struct {
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
botIdentityReadyCh <-chan struct{}
+ statusReporter readyz.Reporter
}
func (s *ApplicationTunnelService) Run(ctx context.Context) error {
@@ -98,10 +100,16 @@ func (s *ApplicationTunnelService) Run(ctx context.Context) error {
}()
s.log.InfoContext(ctx, "Listening for connections.", "address", l.Addr().String())
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+ s.statusReporter.Report(readyz.Healthy)
+
select {
case <-ctx.Done():
return nil
case err := <-errCh:
+ s.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
return trace.Wrap(err, "local proxy failed")
}
}
@@ -262,5 +270,8 @@ func (s *ApplicationTunnelService) issueCert(
// String returns a human-readable string that can uniquely identify the
// service.
func (s *ApplicationTunnelService) String() string {
- return fmt.Sprintf("%s:%s:%s", config.ApplicationTunnelServiceType, s.cfg.Listen, s.cfg.AppName)
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("%s:%s:%s", config.ApplicationTunnelServiceType, s.cfg.Listen, s.cfg.AppName),
+ )
}
diff --git a/lib/tbot/service_bot_identity.go b/lib/tbot/service_bot_identity.go
index 96745b6f24224..5828cc9058fbd 100644
--- a/lib/tbot/service_bot_identity.go
+++ b/lib/tbot/service_bot_identity.go
@@ -43,6 +43,7 @@ import (
"github.com/gravitational/teleport/lib/tbot/bot"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/utils"
)
@@ -58,6 +59,7 @@ type identityService struct {
reloadBroadcaster *channelBroadcaster
cfg *config.BotConfig
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
mu sync.Mutex
client *apiclient.Client
@@ -291,10 +293,13 @@ func (s *identityService) Initialize(ctx context.Context) error {
s.mu.Lock()
s.client = c
s.facade = facade
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
s.mu.Unlock()
s.unblockWaiters()
-
+ s.statusReporter.Report(readyz.Healthy)
s.log.InfoContext(ctx, "Identity initialized successfully")
return nil
}
@@ -371,6 +376,7 @@ func (s *identityService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
waitBeforeFirstRun: true,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_ca_rotation.go b/lib/tbot/service_ca_rotation.go
index 462de4d37e59e..4684387806fca 100644
--- a/lib/tbot/service_ca_rotation.go
+++ b/lib/tbot/service_ca_rotation.go
@@ -34,6 +34,7 @@ import (
apiclient "github.com/gravitational/teleport/api/client"
"github.com/gravitational/teleport/api/types"
"github.com/gravitational/teleport/api/utils/retryutils"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
// debouncer accepts a duration, and a function. When `attempt` is called on
@@ -132,6 +133,7 @@ type caRotationService struct {
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
botIdentityReadyCh <-chan struct{}
+ statusReporter readyz.Reporter
}
func (s *caRotationService) String() string {
@@ -144,6 +146,10 @@ func (s *caRotationService) String() string {
// Run also manages debouncing the renewals across multiple watch
// attempts.
func (s *caRotationService) Run(ctx context.Context) error {
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+
rd := debouncer{
f: s.reloadBroadcaster.broadcast,
debouncePeriod: time.Second * 10,
@@ -183,6 +189,7 @@ func (s *caRotationService) Run(ctx context.Context) error {
if isCancelledErr {
s.log.DebugContext(ctx, "CA watcher detected client closing. Waiting to rewatch", "wait", backoffPeriod)
} else if err != nil {
+ s.statusReporter.Report(readyz.Unhealthy)
s.log.ErrorContext(ctx, "Error occurred whilst watching CA rotations. Waiting to retry", "wait", backoffPeriod, "error", err)
}
@@ -224,6 +231,7 @@ func (s *caRotationService) watchCARotations(ctx context.Context, queueReload fu
// OpInit is a special case omitted by the Watcher when the
// connection succeeds.
if event.Type == types.OpInit {
+ s.statusReporter.Report(readyz.Healthy)
s.log.InfoContext(ctx, "Started watching for CA rotations")
continue
}
diff --git a/lib/tbot/service_client_credential.go b/lib/tbot/service_client_credential.go
index 01dd60e1448ed..7617a1d607e29 100644
--- a/lib/tbot/service_client_credential.go
+++ b/lib/tbot/service_client_credential.go
@@ -19,6 +19,7 @@
package tbot
import (
+ "cmp"
"context"
"log/slog"
@@ -26,6 +27,7 @@ import (
apiclient "github.com/gravitational/teleport/api/client"
"github.com/gravitational/teleport/lib/tbot/config"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
// ClientCredentialOutputService produces credentials which can be used to
@@ -41,10 +43,14 @@ type ClientCredentialOutputService struct {
getBotIdentity getBotIdentityFn
log *slog.Logger
reloadBroadcaster *channelBroadcaster
+ statusReporter readyz.Reporter
}
func (s *ClientCredentialOutputService) String() string {
- return "client-credential-output"
+ return cmp.Or(
+ s.cfg.Name,
+ "client-credential-output",
+ )
}
func (s *ClientCredentialOutputService) OneShot(ctx context.Context) error {
@@ -64,6 +70,7 @@ func (s *ClientCredentialOutputService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_database_output.go b/lib/tbot/service_database_output.go
index c9b95a869b6f5..870b63e41c4ee 100644
--- a/lib/tbot/service_database_output.go
+++ b/lib/tbot/service_database_output.go
@@ -34,6 +34,7 @@ import (
"github.com/gravitational/teleport/lib/tbot/bot"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
// DatabaseOutputService generates the artifacts necessary to connect to a
@@ -47,10 +48,14 @@ type DatabaseOutputService struct {
log *slog.Logger
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
}
func (s *DatabaseOutputService) String() string {
- return fmt.Sprintf("database-output (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("database-output (%s)", s.cfg.Destination.String()),
+ )
}
func (s *DatabaseOutputService) OneShot(ctx context.Context) error {
@@ -70,6 +75,7 @@ func (s *DatabaseOutputService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_database_tunnel.go b/lib/tbot/service_database_tunnel.go
index 9581b43974272..0f7fe878202fe 100644
--- a/lib/tbot/service_database_tunnel.go
+++ b/lib/tbot/service_database_tunnel.go
@@ -34,6 +34,7 @@ import (
"github.com/gravitational/teleport/lib/srv/alpnproxy/common"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tlsca"
"github.com/gravitational/teleport/lib/utils"
)
@@ -71,6 +72,7 @@ type DatabaseTunnelService struct {
botClient *apiclient.Client
getBotIdentity getBotIdentityFn
botIdentityReadyCh <-chan struct{}
+ statusReporter readyz.Reporter
}
// buildLocalProxyConfig initializes the service, fetching any initial information and setting
@@ -234,10 +236,16 @@ func (s *DatabaseTunnelService) Run(ctx context.Context) error {
}()
s.log.InfoContext(ctx, "Listening for connections.", "address", l.Addr().String())
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+ s.statusReporter.Report(readyz.Healthy)
+
select {
case <-ctx.Done():
return nil
case err := <-errCh:
+ s.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
return trace.Wrap(err, "local proxy failed")
}
}
@@ -309,5 +317,8 @@ func (s *DatabaseTunnelService) issueCert(
// String returns a human-readable string that can uniquely identify the
// service.
func (s *DatabaseTunnelService) String() string {
- return fmt.Sprintf("%s:%s:%s", config.DatabaseTunnelServiceType, s.cfg.Listen, s.cfg.Service)
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("%s:%s:%s", config.DatabaseTunnelServiceType, s.cfg.Listen, s.cfg.Service),
+ )
}
diff --git a/lib/tbot/service_diagnostics.go b/lib/tbot/service_diagnostics.go
index 2a78e684bbd0a..d941ca5d1eeff 100644
--- a/lib/tbot/service_diagnostics.go
+++ b/lib/tbot/service_diagnostics.go
@@ -29,14 +29,16 @@ import (
apidefaults "github.com/gravitational/teleport/api/defaults"
"github.com/gravitational/teleport/lib/defaults"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
// diagnosticsService is a [bot.Service] that exposes diagnostics endpoints.
// It's only started when a --diag-addr is provided.
type diagnosticsService struct {
- log *slog.Logger
- diagAddr string
- pprofEnabled bool
+ log *slog.Logger
+ diagAddr string
+ pprofEnabled bool
+ statusRegistry *readyz.Registry
}
func (s *diagnosticsService) String() string {
@@ -69,13 +71,8 @@ func (s *diagnosticsService) Run(ctx context.Context) error {
w.WriteHeader(http.StatusOK)
_, _ = w.Write([]byte("OK"))
}))
- mux.Handle("/readyz", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- // TODO(noah): Eventually this should diverge from /livez and report
- // the readiness status from each sub-service, with an error status if
- // any of them are not ready.
- w.WriteHeader(http.StatusOK)
- _, _ = w.Write([]byte("OK"))
- }))
+ mux.Handle("/readyz", readyz.HTTPHandler(s.statusRegistry))
+ mux.Handle("/readyz/", readyz.HTTPHandler(s.statusRegistry))
srv := http.Server{
Addr: s.diagAddr,
Handler: mux,
diff --git a/lib/tbot/service_example.go b/lib/tbot/service_example.go
index c550c52d9f24f..290e612aedcc0 100644
--- a/lib/tbot/service_example.go
+++ b/lib/tbot/service_example.go
@@ -19,6 +19,7 @@
package tbot
import (
+ "cmp"
"context"
"fmt"
"time"
@@ -46,5 +47,8 @@ func (s *ExampleService) Run(ctx context.Context) error {
}
func (s *ExampleService) String() string {
- return fmt.Sprintf("%s:%s", config.ExampleServiceType, s.Message)
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("%s:%s", config.ExampleServiceType, s.Message),
+ )
}
diff --git a/lib/tbot/service_heartbeat.go b/lib/tbot/service_heartbeat.go
index 335f593ef1fe5..7047711b8b337 100644
--- a/lib/tbot/service_heartbeat.go
+++ b/lib/tbot/service_heartbeat.go
@@ -33,6 +33,7 @@ import (
"github.com/gravitational/teleport"
machineidv1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/machineid/v1"
"github.com/gravitational/teleport/lib/tbot/config"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
type heartbeatSubmitter interface {
@@ -50,6 +51,7 @@ type heartbeatService struct {
botIdentityReadyCh <-chan struct{}
interval time.Duration
retryLimit int
+ statusReporter readyz.Reporter
}
func (s *heartbeatService) heartbeat(ctx context.Context, isStartup bool) error {
@@ -119,6 +121,7 @@ func (s *heartbeatService) Run(ctx context.Context) error {
return nil
},
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_identity_output.go b/lib/tbot/service_identity_output.go
index 08228470b63a4..3129ae0e36cba 100644
--- a/lib/tbot/service_identity_output.go
+++ b/lib/tbot/service_identity_output.go
@@ -38,6 +38,7 @@ import (
"github.com/gravitational/teleport/lib/tbot/bot"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/ssh"
"github.com/gravitational/teleport/lib/utils"
)
@@ -57,6 +58,7 @@ type IdentityOutputService struct {
proxyPingCache *proxyPingCache
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
// executablePath is called to get the path to the tbot executable.
// Usually this is os.Executable
executablePath func() (string, error)
@@ -64,7 +66,10 @@ type IdentityOutputService struct {
}
func (s *IdentityOutputService) String() string {
- return fmt.Sprintf("identity-output (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("identity-output (%s)", s.cfg.Destination.String()),
+ )
}
func (s *IdentityOutputService) OneShot(ctx context.Context) error {
@@ -84,6 +89,7 @@ func (s *IdentityOutputService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_kubernetes_output.go b/lib/tbot/service_kubernetes_output.go
index 4342ceda26a80..49332c85cc3ff 100644
--- a/lib/tbot/service_kubernetes_output.go
+++ b/lib/tbot/service_kubernetes_output.go
@@ -42,6 +42,7 @@ import (
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
logutils "github.com/gravitational/teleport/lib/utils/log"
)
@@ -62,13 +63,17 @@ type KubernetesOutputService struct {
proxyPingCache *proxyPingCache
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
// executablePath is called to get the path to the tbot executable.
// Usually this is os.Executable
executablePath func() (string, error)
}
func (s *KubernetesOutputService) String() string {
- return fmt.Sprintf("kubernetes-output (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("kubernetes-output (%s)", s.cfg.Destination.String()),
+ )
}
func (s *KubernetesOutputService) OneShot(ctx context.Context) error {
@@ -88,6 +93,7 @@ func (s *KubernetesOutputService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_kubernetes_v2_output.go b/lib/tbot/service_kubernetes_v2_output.go
index a045331aea601..9e0bd2ccf3da9 100644
--- a/lib/tbot/service_kubernetes_v2_output.go
+++ b/lib/tbot/service_kubernetes_v2_output.go
@@ -43,6 +43,7 @@ import (
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
logutils "github.com/gravitational/teleport/lib/utils/log"
)
@@ -61,13 +62,17 @@ type KubernetesV2OutputService struct {
proxyPingCache *proxyPingCache
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
// executablePath is called to get the path to the tbot executable.
// Usually this is os.Executable
executablePath func() (string, error)
}
func (s *KubernetesV2OutputService) String() string {
- return fmt.Sprintf("kubernetes-v2-output (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("kubernetes-v2-output (%s)", s.cfg.Destination.String()),
+ )
}
func (s *KubernetesV2OutputService) OneShot(ctx context.Context) error {
@@ -87,6 +92,7 @@ func (s *KubernetesV2OutputService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
}))
}
diff --git a/lib/tbot/service_spiffe_svid_output.go b/lib/tbot/service_spiffe_svid_output.go
index e13bf3db16b42..040607e2a0808 100644
--- a/lib/tbot/service_spiffe_svid_output.go
+++ b/lib/tbot/service_spiffe_svid_output.go
@@ -39,6 +39,7 @@ import (
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
)
@@ -59,13 +60,17 @@ type SPIFFESVIDOutputService struct {
getBotIdentity getBotIdentityFn
log *slog.Logger
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
// trustBundleCache is the cache of trust bundles. It only needs to be
// provided when running in daemon mode.
trustBundleCache *workloadidentity.TrustBundleCache
}
func (s *SPIFFESVIDOutputService) String() string {
- return fmt.Sprintf("spiffe-svid-output (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("spiffe-svid-output (%s)", s.cfg.Destination.String()),
+ )
}
func (s *SPIFFESVIDOutputService) OneShot(ctx context.Context) error {
@@ -94,6 +99,10 @@ func (s *SPIFFESVIDOutputService) Run(ctx context.Context) error {
return trace.Wrap(err, "getting trust bundle set")
}
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+
jitter := retryutils.DefaultJitter
var res *machineidv1pb.SignX509SVIDsResponse
var privateKey crypto.Signer
@@ -104,10 +113,8 @@ func (s *SPIFFESVIDOutputService) Run(ctx context.Context) error {
for {
var retryAfter <-chan time.Time
if failures > 0 {
- backoffTime := time.Second * time.Duration(math.Pow(2, float64(failures-1)))
- if backoffTime > time.Minute {
- backoffTime = time.Minute
- }
+ s.statusReporter.Report(readyz.Unhealthy)
+ backoffTime := min(time.Second*time.Duration(math.Pow(2, float64(failures-1))), time.Minute)
backoffTime = jitter(backoffTime)
s.log.WarnContext(
ctx,
@@ -156,6 +163,7 @@ func (s *SPIFFESVIDOutputService) Run(ctx context.Context) error {
failures++
continue
}
+ s.statusReporter.Report(readyz.Healthy)
failures = 0
}
}
diff --git a/lib/tbot/service_spiffe_workload_api.go b/lib/tbot/service_spiffe_workload_api.go
index 18adfafa0d49d..a71e2727a987c 100644
--- a/lib/tbot/service_spiffe_workload_api.go
+++ b/lib/tbot/service_spiffe_workload_api.go
@@ -57,6 +57,7 @@ import (
"github.com/gravitational/teleport/lib/observability/metrics"
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/config"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
"github.com/gravitational/teleport/lib/tbot/workloadidentity/attrs"
"github.com/gravitational/teleport/lib/tbot/workloadidentity/workloadattest"
@@ -81,6 +82,7 @@ type SPIFFEWorkloadAPIService struct {
log *slog.Logger
resolver reversetunnelclient.Resolver
trustBundleCache *workloadidentity.TrustBundleCache
+ statusReporter readyz.Reporter
// client holds the impersonated client for the service
client *apiclient.Client
@@ -133,6 +135,10 @@ func (s *SPIFFEWorkloadAPIService) setup(ctx context.Context) (err error) {
return trace.Wrap(err, "setting up workload attestation")
}
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+
return nil
}
@@ -283,7 +289,12 @@ func (s *SPIFFEWorkloadAPIService) Run(ctx context.Context) error {
return nil
})
- return trace.Wrap(eg.Wait())
+ s.statusReporter.Report(readyz.Healthy)
+ if err := eg.Wait(); err != nil {
+ s.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
+ return trace.Wrap(eg.Wait())
+ }
+ return nil
}
// serialString returns a human-readable colon-separated string of the serial
@@ -893,5 +904,8 @@ func (s *SPIFFEWorkloadAPIService) ValidateJWTSVID(
// String returns a human-readable string that can uniquely identify the
// service.
func (s *SPIFFEWorkloadAPIService) String() string {
- return fmt.Sprintf("%s:%s", config.SPIFFEWorkloadAPIServiceType, s.cfg.Listen)
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("%s:%s", config.SPIFFEWorkloadAPIServiceType, s.cfg.Listen),
+ )
}
diff --git a/lib/tbot/service_ssh_host_output.go b/lib/tbot/service_ssh_host_output.go
index 3b219e5f3b769..f5df49f139365 100644
--- a/lib/tbot/service_ssh_host_output.go
+++ b/lib/tbot/service_ssh_host_output.go
@@ -39,6 +39,7 @@ import (
"github.com/gravitational/teleport/lib/sshutils"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
type SSHHostOutputService struct {
@@ -50,10 +51,14 @@ type SSHHostOutputService struct {
log *slog.Logger
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
}
func (s *SSHHostOutputService) String() string {
- return fmt.Sprintf("ssh-host (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("ssh-host (%s)", s.cfg.Destination.String()),
+ )
}
func (s *SSHHostOutputService) OneShot(ctx context.Context) error {
@@ -73,6 +78,7 @@ func (s *SSHHostOutputService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_ssh_multiplexer.go b/lib/tbot/service_ssh_multiplexer.go
index f3157e64ebe5f..1584356304f88 100644
--- a/lib/tbot/service_ssh_multiplexer.go
+++ b/lib/tbot/service_ssh_multiplexer.go
@@ -57,6 +57,7 @@ import (
"github.com/gravitational/teleport/lib/tbot/bot"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/ssh"
"github.com/gravitational/teleport/lib/utils"
"github.com/gravitational/teleport/lib/utils/uds"
@@ -105,6 +106,7 @@ type SSHMultiplexerService struct {
proxyPingCache *proxyPingCache
reloadBroadcaster *channelBroadcaster
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
// Fields below here are initialized by the service itself on startup.
identity *identity.Facade
@@ -563,7 +565,16 @@ func (s *SSHMultiplexerService) Run(ctx context.Context) (err error) {
return s.identityRenewalLoop(egCtx, proxyHost, authClient)
})
- return eg.Wait()
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+ s.statusReporter.Report(readyz.Healthy)
+
+ if err := eg.Wait(); err != nil {
+ s.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
+ return err
+ }
+ return nil
}
func (s *SSHMultiplexerService) handleConn(
@@ -796,7 +807,10 @@ func (s *SSHMultiplexerService) handleConn(
}
func (s *SSHMultiplexerService) String() string {
- return config.SSHMultiplexerServiceType
+ return cmp.Or(
+ s.cfg.Name,
+ config.SSHMultiplexerServiceType,
+ )
}
type hostDialer interface {
diff --git a/lib/tbot/service_workload_identity_api.go b/lib/tbot/service_workload_identity_api.go
index a617cbea28b0e..f71e6cc940e5d 100644
--- a/lib/tbot/service_workload_identity_api.go
+++ b/lib/tbot/service_workload_identity_api.go
@@ -48,6 +48,7 @@ import (
"github.com/gravitational/teleport/lib/observability/metrics"
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/config"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
"github.com/gravitational/teleport/lib/tbot/workloadidentity/attrs"
"github.com/gravitational/teleport/lib/tbot/workloadidentity/workloadattest"
@@ -73,6 +74,7 @@ type WorkloadIdentityAPIService struct {
resolver reversetunnelclient.Resolver
trustBundleCache *workloadidentity.TrustBundleCache
crlCache *workloadidentity.CRLCache
+ statusReporter readyz.Reporter
// client holds the impersonated client for the service
client *apiclient.Client
@@ -125,6 +127,10 @@ func (s *WorkloadIdentityAPIService) setup(ctx context.Context) (err error) {
return trace.Wrap(err, "setting up workload attestation")
}
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+
return nil
}
@@ -220,7 +226,12 @@ func (s *WorkloadIdentityAPIService) Run(ctx context.Context) error {
return nil
})
- return trace.Wrap(eg.Wait())
+ s.statusReporter.Report(readyz.Healthy)
+ if err := eg.Wait(); err != nil {
+ s.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
+ return trace.Wrap(err)
+ }
+ return nil
}
func (s *WorkloadIdentityAPIService) authenticateClient(
@@ -698,5 +709,8 @@ func (s *WorkloadIdentityAPIService) ValidateJWTSVID(
// String returns a human-readable string that can uniquely identify the
// service.
func (s *WorkloadIdentityAPIService) String() string {
- return fmt.Sprintf("%s:%s", config.WorkloadIdentityAPIServiceType, s.cfg.Listen)
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("%s:%s", config.WorkloadIdentityAPIServiceType, s.cfg.Listen),
+ )
}
diff --git a/lib/tbot/service_workload_identity_aws_ra.go b/lib/tbot/service_workload_identity_aws_ra.go
index c91286540a9ec..e1daf83cd09f6 100644
--- a/lib/tbot/service_workload_identity_aws_ra.go
+++ b/lib/tbot/service_workload_identity_aws_ra.go
@@ -38,6 +38,7 @@ import (
"github.com/gravitational/teleport/lib/tbot/bot"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
)
@@ -52,11 +53,15 @@ type WorkloadIdentityAWSRAService struct {
log *slog.Logger
resolver reversetunnelclient.Resolver
reloadBroadcaster *channelBroadcaster
+ statusReporter readyz.Reporter
}
// String returns a human-readable description of the service.
func (s *WorkloadIdentityAWSRAService) String() string {
- return fmt.Sprintf("workload-identity-aws-roles-anywhere (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("workload-identity-aws-roles-anywhere (%s)", s.cfg.Destination.String()),
+ )
}
// OneShot runs the service once, generating the output and writing it to the
@@ -80,6 +85,7 @@ func (s *WorkloadIdentityAWSRAService) Run(ctx context.Context) error {
log: s.log,
reloadCh: reloadCh,
identityReadyCh: s.botIdentityReadyCh,
+ statusReporter: s.statusReporter,
})
return trace.Wrap(err)
}
diff --git a/lib/tbot/service_workload_identity_jwt.go b/lib/tbot/service_workload_identity_jwt.go
index ad5c3ec3d0789..f2e0e5b585cf2 100644
--- a/lib/tbot/service_workload_identity_jwt.go
+++ b/lib/tbot/service_workload_identity_jwt.go
@@ -32,6 +32,7 @@ import (
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
)
@@ -44,6 +45,7 @@ type WorkloadIdentityJWTService struct {
getBotIdentity getBotIdentityFn
log *slog.Logger
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
// trustBundleCache is the cache of trust bundles. It only needs to be
// provided when running in daemon mode.
trustBundleCache *workloadidentity.TrustBundleCache
@@ -51,7 +53,10 @@ type WorkloadIdentityJWTService struct {
// String returns a human-readable description of the service.
func (s *WorkloadIdentityJWTService) String() string {
- return fmt.Sprintf("workload-identity-jwt (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("workload-identity-jwt (%s)", s.cfg.Destination.String()),
+ )
}
// OneShot runs the service once, generating the output and writing it to the
@@ -72,6 +77,10 @@ func (s *WorkloadIdentityJWTService) Run(ctx context.Context) error {
return trace.Wrap(err, "getting trust bundle set")
}
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+
jitter := retryutils.DefaultJitter
var cred *workloadidentityv1pb.Credential
var failures int
@@ -80,10 +89,8 @@ func (s *WorkloadIdentityJWTService) Run(ctx context.Context) error {
for {
var retryAfter <-chan time.Time
if failures > 0 {
- backoffTime := time.Second * time.Duration(math.Pow(2, float64(failures-1)))
- if backoffTime > time.Minute {
- backoffTime = time.Minute
- }
+ s.statusReporter.Report(readyz.Unhealthy)
+ backoffTime := min(time.Second*time.Duration(math.Pow(2, float64(failures-1))), time.Minute)
backoffTime = jitter(backoffTime)
s.log.WarnContext(
ctx,
@@ -132,6 +139,7 @@ func (s *WorkloadIdentityJWTService) Run(ctx context.Context) error {
failures++
continue
}
+ s.statusReporter.Report(readyz.Healthy)
failures = 0
}
}
diff --git a/lib/tbot/service_workload_identity_x509.go b/lib/tbot/service_workload_identity_x509.go
index b956c6bf3b184..21c9ef024858e 100644
--- a/lib/tbot/service_workload_identity_x509.go
+++ b/lib/tbot/service_workload_identity_x509.go
@@ -36,6 +36,7 @@ import (
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
)
@@ -48,6 +49,7 @@ type WorkloadIdentityX509Service struct {
getBotIdentity getBotIdentityFn
log *slog.Logger
resolver reversetunnelclient.Resolver
+ statusReporter readyz.Reporter
// trustBundleCache is the cache of trust bundles. It only needs to be
// provided when running in daemon mode.
trustBundleCache *workloadidentity.TrustBundleCache
@@ -56,7 +58,10 @@ type WorkloadIdentityX509Service struct {
// String returns a human-readable description of the service.
func (s *WorkloadIdentityX509Service) String() string {
- return fmt.Sprintf("workload-identity-x509 (%s)", s.cfg.Destination.String())
+ return cmp.Or(
+ s.cfg.Name,
+ fmt.Sprintf("workload-identity-x509 (%s)", s.cfg.Destination.String()),
+ )
}
// OneShot runs the service once, generating the output and writing it to the
@@ -100,6 +105,10 @@ func (s *WorkloadIdentityX509Service) Run(ctx context.Context) error {
return trace.Wrap(err, "getting CRL set from cache")
}
+ if s.statusReporter == nil {
+ s.statusReporter = readyz.NoopReporter()
+ }
+
jitter := retryutils.DefaultJitter
var x509Cred *workloadidentityv1pb.Credential
var privateKey crypto.Signer
@@ -109,10 +118,8 @@ func (s *WorkloadIdentityX509Service) Run(ctx context.Context) error {
for {
var retryAfter <-chan time.Time
if failures > 0 {
- backoffTime := time.Second * time.Duration(math.Pow(2, float64(failures-1)))
- if backoffTime > time.Minute {
- backoffTime = time.Minute
- }
+ s.statusReporter.Report(readyz.Unhealthy)
+ backoffTime := min(time.Second*time.Duration(math.Pow(2, float64(failures-1))), time.Minute)
backoffTime = jitter(backoffTime)
s.log.WarnContext(
ctx,
@@ -170,6 +177,7 @@ func (s *WorkloadIdentityX509Service) Run(ctx context.Context) error {
failures++
continue
}
+ s.statusReporter.Report(readyz.Healthy)
failures = 0
}
}
diff --git a/lib/tbot/tbot.go b/lib/tbot/tbot.go
index 5ed5e31d28422..8c8dc9472a43f 100644
--- a/lib/tbot/tbot.go
+++ b/lib/tbot/tbot.go
@@ -48,6 +48,7 @@ import (
"github.com/gravitational/teleport/lib/tbot/client"
"github.com/gravitational/teleport/lib/tbot/config"
"github.com/gravitational/teleport/lib/tbot/identity"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
"github.com/gravitational/teleport/lib/utils"
)
@@ -226,6 +227,8 @@ func (b *Bot) Run(ctx context.Context) (err error) {
}()
}
+ statusRegistry := readyz.NewRegistry()
+
b.mu.Lock()
b.botIdentitySvc = &identityService{
cfg: b.cfg,
@@ -234,6 +237,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
log: b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "identity"),
),
+ statusReporter: statusRegistry.AddService("identity"),
}
b.mu.Unlock()
@@ -270,8 +274,9 @@ func (b *Bot) Run(ctx context.Context) (err error) {
// Setup all other services
if b.cfg.DiagAddr != "" {
services = append(services, &diagnosticsService{
- diagAddr: b.cfg.DiagAddr,
- pprofEnabled: b.cfg.Debug,
+ diagAddr: b.cfg.DiagAddr,
+ pprofEnabled: b.cfg.Debug,
+ statusRegistry: statusRegistry,
log: b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "diagnostics"),
),
@@ -291,6 +296,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
botIdentityReadyCh: b.botIdentitySvc.Ready(),
interval: time.Minute * 30,
retryLimit: 5,
+ statusReporter: statusRegistry.AddService("heartbeat"),
})
services = append(services, &caRotationService{
@@ -301,6 +307,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
teleport.ComponentKey, teleport.Component(componentTBot, "ca-rotation"),
),
reloadBroadcaster: reloadBroadcaster,
+ statusReporter: statusRegistry.AddService("ca-rotation"),
})
// We only want to create this service if it's needed by a dependent
@@ -321,6 +328,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
Logger: b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "spiffe-trust-bundle-cache"),
),
+ StatusReporter: statusRegistry.AddService("spiffe-trust-bundle-cache"),
})
if err != nil {
return nil, trace.Wrap(err)
@@ -340,6 +348,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
Logger: b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "crl-cache"),
),
+ StatusReporter: statusRegistry.AddService("crl-cache"),
})
if err != nil {
return nil, trace.Wrap(err)
@@ -384,6 +393,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.DatabaseTunnelService:
svc := &DatabaseTunnelService{
@@ -398,6 +408,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.ExampleService:
services = append(services, &ExampleService{
@@ -418,6 +429,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.KubernetesOutput:
svc := &KubernetesOutputService{
@@ -434,6 +446,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.KubernetesV2Output:
svc := &KubernetesV2OutputService{
@@ -450,6 +463,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.SPIFFESVIDOutput:
svc := &SPIFFESVIDOutputService{
@@ -462,6 +476,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
if !b.cfg.Oneshot {
tbCache, err := setupTrustBundleCache()
if err != nil {
@@ -483,6 +498,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.ApplicationOutput:
svc := &ApplicationOutputService{
@@ -497,6 +513,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.DatabaseOutput:
svc := &DatabaseOutputService{
@@ -511,6 +528,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.IdentityOutput:
svc := &IdentityOutputService{
@@ -528,6 +546,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.UnstableClientCredentialOutput:
svc := &ClientCredentialOutputService{
@@ -541,6 +560,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.ApplicationTunnelService:
svc := &ApplicationTunnelService{
@@ -555,6 +575,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.WorkloadIdentityX509Service:
svc := &WorkloadIdentityX509Service{
@@ -567,6 +588,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
if !b.cfg.Oneshot {
tbCache, err := setupTrustBundleCache()
if err != nil {
@@ -591,6 +613,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
if !b.cfg.Oneshot {
tbCache, err := setupTrustBundleCache()
if err != nil {
@@ -636,6 +659,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
case *config.WorkloadIdentityAWSRAService:
svc := &WorkloadIdentityAWSRAService{
@@ -650,6 +674,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
svc.log = b.log.With(
teleport.ComponentKey, teleport.Component(componentTBot, "svc", svc.String()),
)
+ svc.statusReporter = statusRegistry.AddService(svc.String())
services = append(services, svc)
default:
return trace.BadParameter("unknown service type: %T", svcCfg)
diff --git a/lib/tbot/workloadidentity/crl_cache.go b/lib/tbot/workloadidentity/crl_cache.go
index a5f34165bdb22..3d994ec82bc8f 100644
--- a/lib/tbot/workloadidentity/crl_cache.go
+++ b/lib/tbot/workloadidentity/crl_cache.go
@@ -29,6 +29,7 @@ import (
"github.com/gravitational/trace"
workloadidentityv1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/workloadidentity/v1"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
// CRLSet is a collection of CRLs.
@@ -72,6 +73,7 @@ type CRLCache struct {
revocationsClient workloadidentityv1pb.WorkloadIdentityRevocationServiceClient
logger *slog.Logger
botIdentityReadyCh <-chan struct{}
+ statusReporter readyz.Reporter
mu sync.Mutex
crlSet *CRLSet
@@ -84,6 +86,7 @@ type CRLCacheConfig struct {
RevocationsClient workloadidentityv1pb.WorkloadIdentityRevocationServiceClient
Logger *slog.Logger
BotIdentityReadyCh <-chan struct{}
+ StatusReporter readyz.Reporter
}
// NewCRLCache creates a new CRLCache.
@@ -94,10 +97,14 @@ func NewCRLCache(cfg CRLCacheConfig) (*CRLCache, error) {
case cfg.Logger == nil:
return nil, trace.BadParameter("missing Logger")
}
+ if cfg.StatusReporter == nil {
+ cfg.StatusReporter = readyz.NoopReporter()
+ }
return &CRLCache{
revocationsClient: cfg.RevocationsClient,
logger: cfg.Logger,
botIdentityReadyCh: cfg.BotIdentityReadyCh,
+ statusReporter: cfg.StatusReporter,
initialized: make(chan struct{}),
}, nil
}
@@ -147,6 +154,7 @@ func (m *CRLCache) Run(ctx context.Context) error {
"error", err,
"backoff", trustBundleInitFailureBackoff,
)
+ m.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
}
select {
case <-ctx.Done():
@@ -168,6 +176,7 @@ func (m *CRLCache) watch(ctx context.Context) error {
return trace.Wrap(err, "opening CRL stream")
}
+ m.statusReporter.Report(readyz.Healthy)
for {
res, err := stream.Recv()
if err != nil {
diff --git a/lib/tbot/workloadidentity/trust_bundle_cache.go b/lib/tbot/workloadidentity/trust_bundle_cache.go
index 707ce302a811d..8c651b4019e1f 100644
--- a/lib/tbot/workloadidentity/trust_bundle_cache.go
+++ b/lib/tbot/workloadidentity/trust_bundle_cache.go
@@ -41,6 +41,7 @@ import (
"github.com/gravitational/teleport/api/utils/keys"
"github.com/gravitational/teleport/lib/jwt"
"github.com/gravitational/teleport/lib/services"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
)
var tracer = otel.Tracer("github.com/gravitational/teleport/lib/spiffe")
@@ -177,7 +178,8 @@ type TrustBundleCache struct {
clusterName string
botIdentityReadyCh <-chan struct{}
- logger *slog.Logger
+ logger *slog.Logger
+ statusReporter readyz.Reporter
mu sync.RWMutex
bundleSet *BundleSet
@@ -199,6 +201,7 @@ type TrustBundleCacheConfig struct {
ClusterName string
Logger *slog.Logger
BotIdentityReadyCh <-chan struct{}
+ StatusReporter readyz.Reporter
}
// NewTrustBundleCache creates a new TrustBundleCache.
@@ -215,6 +218,9 @@ func NewTrustBundleCache(cfg TrustBundleCacheConfig) (*TrustBundleCache, error)
case cfg.Logger == nil:
return nil, trace.BadParameter("missing Logger")
}
+ if cfg.StatusReporter == nil {
+ cfg.StatusReporter = readyz.NoopReporter()
+ }
return &TrustBundleCache{
federationClient: cfg.FederationClient,
trustClient: cfg.TrustClient,
@@ -222,6 +228,7 @@ func NewTrustBundleCache(cfg TrustBundleCacheConfig) (*TrustBundleCache, error)
clusterName: cfg.ClusterName,
logger: cfg.Logger,
botIdentityReadyCh: cfg.BotIdentityReadyCh,
+ statusReporter: cfg.StatusReporter,
initialized: make(chan struct{}),
}, nil
}
@@ -260,6 +267,7 @@ func (m *TrustBundleCache) Run(ctx context.Context) error {
"error", err,
"backoff", trustBundleInitFailureBackoff,
)
+ m.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
}
select {
case <-ctx.Done():
@@ -342,6 +350,8 @@ func (m *TrustBundleCache) watch(ctx context.Context) error {
return trace.Wrap(watcher.Error(), "watcher closed before initialization")
}
+ m.statusReporter.Report(readyz.Healthy)
+
// Now that we know our watcher is streaming events, we can fetch the
// current point-in-time list of resources.
bundleSet, err := FetchInitialBundleSet(