diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
index 8cc8b8dcc6aea..af1b9c93095cd 100644
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -4,6 +4,7 @@ on:
merge_group:
env:
GOFLAGS: '-buildvcs=false'
+ GOEXPERIMENT: 'synctest'
jobs:
changes:
diff --git a/Makefile b/Makefile
index 8856d0daf090f..99984844a912a 100644
--- a/Makefile
+++ b/Makefile
@@ -941,7 +941,7 @@ test-go-prepare: ensure-webassets bpf-bytecode $(TEST_LOG_DIR) ensure-gotestsum
test-go-unit: FLAGS ?= -race -shuffle on
test-go-unit: SUBJECT ?= $(shell go list ./... | grep -vE 'teleport/(e2e|integration|tool/tsh|integrations/operator|integrations/access|integrations/lib)')
test-go-unit:
- $(CGOFLAG) go test -cover -json -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG) $(LIBFIDO2_TEST_TAG) $(TOUCHID_TAG) $(PIV_TEST_TAG) $(VNETDAEMON_TAG)" $(PACKAGES) $(SUBJECT) $(FLAGS) $(ADDFLAGS) \
+ $(CGOFLAG) GOEXPERIMENT=synctest go test -cover -json -tags "$(PAM_TAG) $(FIPS_TAG) $(BPF_TAG) $(LIBFIDO2_TEST_TAG) $(TOUCHID_TAG) $(PIV_TEST_TAG) $(VNETDAEMON_TAG)" $(PACKAGES) $(SUBJECT) $(FLAGS) $(ADDFLAGS) \
| tee $(TEST_LOG_DIR)/unit.json \
| gotestsum --raw-command -- cat
diff --git a/api/gen/proto/go/teleport/machineid/v1/bot_instance.pb.go b/api/gen/proto/go/teleport/machineid/v1/bot_instance.pb.go
index fe5b9ca1689f7..1cc8a9b9b58be 100644
--- a/api/gen/proto/go/teleport/machineid/v1/bot_instance.pb.go
+++ b/api/gen/proto/go/teleport/machineid/v1/bot_instance.pb.go
@@ -103,6 +103,64 @@ func (BotKind) EnumDescriptor() ([]byte, []int) {
return file_teleport_machineid_v1_bot_instance_proto_rawDescGZIP(), []int{0}
}
+// BotInstanceHealthStatus describes the healthiness of a `tbot` service.
+type BotInstanceHealthStatus int32
+
+const (
+ // The enum zero-value, it means no status was included.
+ BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNSPECIFIED BotInstanceHealthStatus = 0
+ // Means the service is still "starting up" and hasn't reported its status.
+ BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_INITIALIZING BotInstanceHealthStatus = 1
+ // Means the service is healthy and ready to serve traffic, or it has
+ // recently succeeded in generating an output.
+ BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_HEALTHY BotInstanceHealthStatus = 2
+ // Means the service is failing to serve traffic or generate output.
+ BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY BotInstanceHealthStatus = 3
+)
+
+// Enum value maps for BotInstanceHealthStatus.
+var (
+ BotInstanceHealthStatus_name = map[int32]string{
+ 0: "BOT_INSTANCE_HEALTH_STATUS_UNSPECIFIED",
+ 1: "BOT_INSTANCE_HEALTH_STATUS_INITIALIZING",
+ 2: "BOT_INSTANCE_HEALTH_STATUS_HEALTHY",
+ 3: "BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY",
+ }
+ BotInstanceHealthStatus_value = map[string]int32{
+ "BOT_INSTANCE_HEALTH_STATUS_UNSPECIFIED": 0,
+ "BOT_INSTANCE_HEALTH_STATUS_INITIALIZING": 1,
+ "BOT_INSTANCE_HEALTH_STATUS_HEALTHY": 2,
+ "BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY": 3,
+ }
+)
+
+func (x BotInstanceHealthStatus) Enum() *BotInstanceHealthStatus {
+ p := new(BotInstanceHealthStatus)
+ *p = x
+ return p
+}
+
+func (x BotInstanceHealthStatus) String() string {
+ return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (BotInstanceHealthStatus) Descriptor() protoreflect.EnumDescriptor {
+ return file_teleport_machineid_v1_bot_instance_proto_enumTypes[1].Descriptor()
+}
+
+func (BotInstanceHealthStatus) Type() protoreflect.EnumType {
+ return &file_teleport_machineid_v1_bot_instance_proto_enumTypes[1]
+}
+
+func (x BotInstanceHealthStatus) Number() protoreflect.EnumNumber {
+ return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use BotInstanceHealthStatus.Descriptor instead.
+func (BotInstanceHealthStatus) EnumDescriptor() ([]byte, []int) {
+ return file_teleport_machineid_v1_bot_instance_proto_rawDescGZIP(), []int{1}
+}
+
// A BotInstance
type BotInstance struct {
state protoimpl.MessageState `protogen:"open.v1"`
@@ -548,8 +606,10 @@ type BotInstanceStatus struct {
InitialHeartbeat *BotInstanceStatusHeartbeat `protobuf:"bytes,3,opt,name=initial_heartbeat,json=initialHeartbeat,proto3" json:"initial_heartbeat,omitempty"`
// The N most recent heartbeats for this bot instance.
LatestHeartbeats []*BotInstanceStatusHeartbeat `protobuf:"bytes,4,rep,name=latest_heartbeats,json=latestHeartbeats,proto3" json:"latest_heartbeats,omitempty"`
- unknownFields protoimpl.UnknownFields
- sizeCache protoimpl.SizeCache
+ // The health of the services/output `tbot` is running.
+ ServiceHealth []*BotInstanceServiceHealth `protobuf:"bytes,5,rep,name=service_health,json=serviceHealth,proto3" json:"service_health,omitempty"`
+ unknownFields protoimpl.UnknownFields
+ sizeCache protoimpl.SizeCache
}
func (x *BotInstanceStatus) Reset() {
@@ -610,6 +670,142 @@ func (x *BotInstanceStatus) GetLatestHeartbeats() []*BotInstanceStatusHeartbeat
return nil
}
+func (x *BotInstanceStatus) GetServiceHealth() []*BotInstanceServiceHealth {
+ if x != nil {
+ return x.ServiceHealth
+ }
+ return nil
+}
+
+// BotInstanceServiceHealth is a snapshot of a `tbot` service's health.
+type BotInstanceServiceHealth struct {
+ state protoimpl.MessageState `protogen:"open.v1"`
+ // Service identifies the service.
+ Service *BotInstanceServiceIdentifier `protobuf:"bytes,1,opt,name=service,proto3" json:"service,omitempty"`
+ // Status describes the service's healthiness.
+ Status BotInstanceHealthStatus `protobuf:"varint,2,opt,name=status,proto3,enum=teleport.machineid.v1.BotInstanceHealthStatus" json:"status,omitempty"`
+ // Reason is a human-readable explanation for the service's status. It might
+ // include an error message.
+ Reason *string `protobuf:"bytes,3,opt,name=reason,proto3,oneof" json:"reason,omitempty"`
+ // UpdatedAt is the time at which the service's health last changed.
+ UpdatedAt *timestamppb.Timestamp `protobuf:"bytes,4,opt,name=updated_at,json=updatedAt,proto3" json:"updated_at,omitempty"`
+ unknownFields protoimpl.UnknownFields
+ sizeCache protoimpl.SizeCache
+}
+
+func (x *BotInstanceServiceHealth) Reset() {
+ *x = BotInstanceServiceHealth{}
+ mi := &file_teleport_machineid_v1_bot_instance_proto_msgTypes[5]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+}
+
+func (x *BotInstanceServiceHealth) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*BotInstanceServiceHealth) ProtoMessage() {}
+
+func (x *BotInstanceServiceHealth) ProtoReflect() protoreflect.Message {
+ mi := &file_teleport_machineid_v1_bot_instance_proto_msgTypes[5]
+ if x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use BotInstanceServiceHealth.ProtoReflect.Descriptor instead.
+func (*BotInstanceServiceHealth) Descriptor() ([]byte, []int) {
+ return file_teleport_machineid_v1_bot_instance_proto_rawDescGZIP(), []int{5}
+}
+
+func (x *BotInstanceServiceHealth) GetService() *BotInstanceServiceIdentifier {
+ if x != nil {
+ return x.Service
+ }
+ return nil
+}
+
+func (x *BotInstanceServiceHealth) GetStatus() BotInstanceHealthStatus {
+ if x != nil {
+ return x.Status
+ }
+ return BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNSPECIFIED
+}
+
+func (x *BotInstanceServiceHealth) GetReason() string {
+ if x != nil && x.Reason != nil {
+ return *x.Reason
+ }
+ return ""
+}
+
+func (x *BotInstanceServiceHealth) GetUpdatedAt() *timestamppb.Timestamp {
+ if x != nil {
+ return x.UpdatedAt
+ }
+ return nil
+}
+
+// BotInstanceServiceIdentifier uniquely identifies a `tbot` service.
+type BotInstanceServiceIdentifier struct {
+ state protoimpl.MessageState `protogen:"open.v1"`
+ // Type of service (e.g. database-tunnel, ssh-multiplexer).
+ Type string `protobuf:"bytes,1,opt,name=type,proto3" json:"type,omitempty"`
+ // Name of the service, either given by the user or auto-generated.
+ Name string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"`
+ unknownFields protoimpl.UnknownFields
+ sizeCache protoimpl.SizeCache
+}
+
+func (x *BotInstanceServiceIdentifier) Reset() {
+ *x = BotInstanceServiceIdentifier{}
+ mi := &file_teleport_machineid_v1_bot_instance_proto_msgTypes[6]
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ ms.StoreMessageInfo(mi)
+}
+
+func (x *BotInstanceServiceIdentifier) String() string {
+ return protoimpl.X.MessageStringOf(x)
+}
+
+func (*BotInstanceServiceIdentifier) ProtoMessage() {}
+
+func (x *BotInstanceServiceIdentifier) ProtoReflect() protoreflect.Message {
+ mi := &file_teleport_machineid_v1_bot_instance_proto_msgTypes[6]
+ if x != nil {
+ ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+ if ms.LoadMessageInfo() == nil {
+ ms.StoreMessageInfo(mi)
+ }
+ return ms
+ }
+ return mi.MessageOf(x)
+}
+
+// Deprecated: Use BotInstanceServiceIdentifier.ProtoReflect.Descriptor instead.
+func (*BotInstanceServiceIdentifier) Descriptor() ([]byte, []int) {
+ return file_teleport_machineid_v1_bot_instance_proto_rawDescGZIP(), []int{6}
+}
+
+func (x *BotInstanceServiceIdentifier) GetType() string {
+ if x != nil {
+ return x.Type
+ }
+ return ""
+}
+
+func (x *BotInstanceServiceIdentifier) GetName() string {
+ if x != nil {
+ return x.Name
+ }
+ return ""
+}
+
var File_teleport_machineid_v1_bot_instance_proto protoreflect.FileDescriptor
const file_teleport_machineid_v1_bot_instance_proto_rawDesc = "" +
@@ -658,18 +854,34 @@ const file_teleport_machineid_v1_bot_instance_proto_rawDesc = "" +
"\n" +
"public_key\x18\x06 \x01(\fR\tpublicKey\x12F\n" +
"\n" +
- "join_attrs\x18\b \x01(\v2'.teleport.workloadidentity.v1.JoinAttrsR\tjoinAttrsJ\x04\b\a\x10\bR\vfingerprint\"\xb1\x03\n" +
+ "join_attrs\x18\b \x01(\v2'.teleport.workloadidentity.v1.JoinAttrsR\tjoinAttrsJ\x04\b\a\x10\bR\vfingerprint\"\x89\x04\n" +
"\x11BotInstanceStatus\x12m\n" +
"\x16initial_authentication\x18\x01 \x01(\v26.teleport.machineid.v1.BotInstanceStatusAuthenticationR\x15initialAuthentication\x12m\n" +
"\x16latest_authentications\x18\x02 \x03(\v26.teleport.machineid.v1.BotInstanceStatusAuthenticationR\x15latestAuthentications\x12^\n" +
"\x11initial_heartbeat\x18\x03 \x01(\v21.teleport.machineid.v1.BotInstanceStatusHeartbeatR\x10initialHeartbeat\x12^\n" +
- "\x11latest_heartbeats\x18\x04 \x03(\v21.teleport.machineid.v1.BotInstanceStatusHeartbeatR\x10latestHeartbeats*\x8c\x01\n" +
+ "\x11latest_heartbeats\x18\x04 \x03(\v21.teleport.machineid.v1.BotInstanceStatusHeartbeatR\x10latestHeartbeats\x12V\n" +
+ "\x0eservice_health\x18\x05 \x03(\v2/.teleport.machineid.v1.BotInstanceServiceHealthR\rserviceHealth\"\x94\x02\n" +
+ "\x18BotInstanceServiceHealth\x12M\n" +
+ "\aservice\x18\x01 \x01(\v23.teleport.machineid.v1.BotInstanceServiceIdentifierR\aservice\x12F\n" +
+ "\x06status\x18\x02 \x01(\x0e2..teleport.machineid.v1.BotInstanceHealthStatusR\x06status\x12\x1b\n" +
+ "\x06reason\x18\x03 \x01(\tH\x00R\x06reason\x88\x01\x01\x129\n" +
+ "\n" +
+ "updated_at\x18\x04 \x01(\v2\x1a.google.protobuf.TimestampR\tupdatedAtB\t\n" +
+ "\a_reason\"F\n" +
+ "\x1cBotInstanceServiceIdentifier\x12\x12\n" +
+ "\x04type\x18\x01 \x01(\tR\x04type\x12\x12\n" +
+ "\x04name\x18\x02 \x01(\tR\x04name*\x8c\x01\n" +
"\aBotKind\x12\x18\n" +
"\x14BOT_KIND_UNSPECIFIED\x10\x00\x12\x11\n" +
"\rBOT_KIND_TBOT\x10\x01\x12\x1f\n" +
"\x1bBOT_KIND_TERRAFORM_PROVIDER\x10\x02\x12 \n" +
"\x1cBOT_KIND_KUBERNETES_OPERATOR\x10\x03\x12\x11\n" +
- "\rBOT_KIND_TCTL\x10\x04BVZTgithub.com/gravitational/teleport/api/gen/proto/go/teleport/machineid/v1;machineidv1b\x06proto3"
+ "\rBOT_KIND_TCTL\x10\x04*\xc4\x01\n" +
+ "\x17BotInstanceHealthStatus\x12*\n" +
+ "&BOT_INSTANCE_HEALTH_STATUS_UNSPECIFIED\x10\x00\x12+\n" +
+ "'BOT_INSTANCE_HEALTH_STATUS_INITIALIZING\x10\x01\x12&\n" +
+ "\"BOT_INSTANCE_HEALTH_STATUS_HEALTHY\x10\x02\x12(\n" +
+ "$BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY\x10\x03BVZTgithub.com/gravitational/teleport/api/gen/proto/go/teleport/machineid/v1;machineidv1b\x06proto3"
var (
file_teleport_machineid_v1_bot_instance_proto_rawDescOnce sync.Once
@@ -683,42 +895,49 @@ func file_teleport_machineid_v1_bot_instance_proto_rawDescGZIP() []byte {
return file_teleport_machineid_v1_bot_instance_proto_rawDescData
}
-var file_teleport_machineid_v1_bot_instance_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
-var file_teleport_machineid_v1_bot_instance_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
+var file_teleport_machineid_v1_bot_instance_proto_enumTypes = make([]protoimpl.EnumInfo, 2)
+var file_teleport_machineid_v1_bot_instance_proto_msgTypes = make([]protoimpl.MessageInfo, 7)
var file_teleport_machineid_v1_bot_instance_proto_goTypes = []any{
(BotKind)(0), // 0: teleport.machineid.v1.BotKind
- (*BotInstance)(nil), // 1: teleport.machineid.v1.BotInstance
- (*BotInstanceSpec)(nil), // 2: teleport.machineid.v1.BotInstanceSpec
- (*BotInstanceStatusHeartbeat)(nil), // 3: teleport.machineid.v1.BotInstanceStatusHeartbeat
- (*BotInstanceStatusAuthentication)(nil), // 4: teleport.machineid.v1.BotInstanceStatusAuthentication
- (*BotInstanceStatus)(nil), // 5: teleport.machineid.v1.BotInstanceStatus
- (*v1.Metadata)(nil), // 6: teleport.header.v1.Metadata
- (*timestamppb.Timestamp)(nil), // 7: google.protobuf.Timestamp
- (*durationpb.Duration)(nil), // 8: google.protobuf.Duration
- (*types.UpdaterV2Info)(nil), // 9: types.UpdaterV2Info
- (*structpb.Struct)(nil), // 10: google.protobuf.Struct
- (*v11.JoinAttrs)(nil), // 11: teleport.workloadidentity.v1.JoinAttrs
+ (BotInstanceHealthStatus)(0), // 1: teleport.machineid.v1.BotInstanceHealthStatus
+ (*BotInstance)(nil), // 2: teleport.machineid.v1.BotInstance
+ (*BotInstanceSpec)(nil), // 3: teleport.machineid.v1.BotInstanceSpec
+ (*BotInstanceStatusHeartbeat)(nil), // 4: teleport.machineid.v1.BotInstanceStatusHeartbeat
+ (*BotInstanceStatusAuthentication)(nil), // 5: teleport.machineid.v1.BotInstanceStatusAuthentication
+ (*BotInstanceStatus)(nil), // 6: teleport.machineid.v1.BotInstanceStatus
+ (*BotInstanceServiceHealth)(nil), // 7: teleport.machineid.v1.BotInstanceServiceHealth
+ (*BotInstanceServiceIdentifier)(nil), // 8: teleport.machineid.v1.BotInstanceServiceIdentifier
+ (*v1.Metadata)(nil), // 9: teleport.header.v1.Metadata
+ (*timestamppb.Timestamp)(nil), // 10: google.protobuf.Timestamp
+ (*durationpb.Duration)(nil), // 11: google.protobuf.Duration
+ (*types.UpdaterV2Info)(nil), // 12: types.UpdaterV2Info
+ (*structpb.Struct)(nil), // 13: google.protobuf.Struct
+ (*v11.JoinAttrs)(nil), // 14: teleport.workloadidentity.v1.JoinAttrs
}
var file_teleport_machineid_v1_bot_instance_proto_depIdxs = []int32{
- 6, // 0: teleport.machineid.v1.BotInstance.metadata:type_name -> teleport.header.v1.Metadata
- 2, // 1: teleport.machineid.v1.BotInstance.spec:type_name -> teleport.machineid.v1.BotInstanceSpec
- 5, // 2: teleport.machineid.v1.BotInstance.status:type_name -> teleport.machineid.v1.BotInstanceStatus
- 7, // 3: teleport.machineid.v1.BotInstanceStatusHeartbeat.recorded_at:type_name -> google.protobuf.Timestamp
- 8, // 4: teleport.machineid.v1.BotInstanceStatusHeartbeat.uptime:type_name -> google.protobuf.Duration
- 9, // 5: teleport.machineid.v1.BotInstanceStatusHeartbeat.updater_info:type_name -> types.UpdaterV2Info
+ 9, // 0: teleport.machineid.v1.BotInstance.metadata:type_name -> teleport.header.v1.Metadata
+ 3, // 1: teleport.machineid.v1.BotInstance.spec:type_name -> teleport.machineid.v1.BotInstanceSpec
+ 6, // 2: teleport.machineid.v1.BotInstance.status:type_name -> teleport.machineid.v1.BotInstanceStatus
+ 10, // 3: teleport.machineid.v1.BotInstanceStatusHeartbeat.recorded_at:type_name -> google.protobuf.Timestamp
+ 11, // 4: teleport.machineid.v1.BotInstanceStatusHeartbeat.uptime:type_name -> google.protobuf.Duration
+ 12, // 5: teleport.machineid.v1.BotInstanceStatusHeartbeat.updater_info:type_name -> types.UpdaterV2Info
0, // 6: teleport.machineid.v1.BotInstanceStatusHeartbeat.kind:type_name -> teleport.machineid.v1.BotKind
- 7, // 7: teleport.machineid.v1.BotInstanceStatusAuthentication.authenticated_at:type_name -> google.protobuf.Timestamp
- 10, // 8: teleport.machineid.v1.BotInstanceStatusAuthentication.metadata:type_name -> google.protobuf.Struct
- 11, // 9: teleport.machineid.v1.BotInstanceStatusAuthentication.join_attrs:type_name -> teleport.workloadidentity.v1.JoinAttrs
- 4, // 10: teleport.machineid.v1.BotInstanceStatus.initial_authentication:type_name -> teleport.machineid.v1.BotInstanceStatusAuthentication
- 4, // 11: teleport.machineid.v1.BotInstanceStatus.latest_authentications:type_name -> teleport.machineid.v1.BotInstanceStatusAuthentication
- 3, // 12: teleport.machineid.v1.BotInstanceStatus.initial_heartbeat:type_name -> teleport.machineid.v1.BotInstanceStatusHeartbeat
- 3, // 13: teleport.machineid.v1.BotInstanceStatus.latest_heartbeats:type_name -> teleport.machineid.v1.BotInstanceStatusHeartbeat
- 14, // [14:14] is the sub-list for method output_type
- 14, // [14:14] is the sub-list for method input_type
- 14, // [14:14] is the sub-list for extension type_name
- 14, // [14:14] is the sub-list for extension extendee
- 0, // [0:14] is the sub-list for field type_name
+ 10, // 7: teleport.machineid.v1.BotInstanceStatusAuthentication.authenticated_at:type_name -> google.protobuf.Timestamp
+ 13, // 8: teleport.machineid.v1.BotInstanceStatusAuthentication.metadata:type_name -> google.protobuf.Struct
+ 14, // 9: teleport.machineid.v1.BotInstanceStatusAuthentication.join_attrs:type_name -> teleport.workloadidentity.v1.JoinAttrs
+ 5, // 10: teleport.machineid.v1.BotInstanceStatus.initial_authentication:type_name -> teleport.machineid.v1.BotInstanceStatusAuthentication
+ 5, // 11: teleport.machineid.v1.BotInstanceStatus.latest_authentications:type_name -> teleport.machineid.v1.BotInstanceStatusAuthentication
+ 4, // 12: teleport.machineid.v1.BotInstanceStatus.initial_heartbeat:type_name -> teleport.machineid.v1.BotInstanceStatusHeartbeat
+ 4, // 13: teleport.machineid.v1.BotInstanceStatus.latest_heartbeats:type_name -> teleport.machineid.v1.BotInstanceStatusHeartbeat
+ 7, // 14: teleport.machineid.v1.BotInstanceStatus.service_health:type_name -> teleport.machineid.v1.BotInstanceServiceHealth
+ 8, // 15: teleport.machineid.v1.BotInstanceServiceHealth.service:type_name -> teleport.machineid.v1.BotInstanceServiceIdentifier
+ 1, // 16: teleport.machineid.v1.BotInstanceServiceHealth.status:type_name -> teleport.machineid.v1.BotInstanceHealthStatus
+ 10, // 17: teleport.machineid.v1.BotInstanceServiceHealth.updated_at:type_name -> google.protobuf.Timestamp
+ 18, // [18:18] is the sub-list for method output_type
+ 18, // [18:18] is the sub-list for method input_type
+ 18, // [18:18] is the sub-list for extension type_name
+ 18, // [18:18] is the sub-list for extension extendee
+ 0, // [0:18] is the sub-list for field type_name
}
func init() { file_teleport_machineid_v1_bot_instance_proto_init() }
@@ -726,13 +945,14 @@ func file_teleport_machineid_v1_bot_instance_proto_init() {
if File_teleport_machineid_v1_bot_instance_proto != nil {
return
}
+ file_teleport_machineid_v1_bot_instance_proto_msgTypes[5].OneofWrappers = []any{}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_teleport_machineid_v1_bot_instance_proto_rawDesc), len(file_teleport_machineid_v1_bot_instance_proto_rawDesc)),
- NumEnums: 1,
- NumMessages: 5,
+ NumEnums: 2,
+ NumMessages: 7,
NumExtensions: 0,
NumServices: 0,
},
diff --git a/api/gen/proto/go/teleport/machineid/v1/bot_instance_service.pb.go b/api/gen/proto/go/teleport/machineid/v1/bot_instance_service.pb.go
index 8fa41fe942e4e..45bad52dfd303 100644
--- a/api/gen/proto/go/teleport/machineid/v1/bot_instance_service.pb.go
+++ b/api/gen/proto/go/teleport/machineid/v1/bot_instance_service.pb.go
@@ -295,7 +295,9 @@ func (x *DeleteBotInstanceRequest) GetInstanceId() string {
type SubmitHeartbeatRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The heartbeat data to submit.
- Heartbeat *BotInstanceStatusHeartbeat `protobuf:"bytes,1,opt,name=heartbeat,proto3" json:"heartbeat,omitempty"`
+ Heartbeat *BotInstanceStatusHeartbeat `protobuf:"bytes,1,opt,name=heartbeat,proto3" json:"heartbeat,omitempty"`
+ // The health of the services/output `tbot` is running.
+ ServiceHealth []*BotInstanceServiceHealth `protobuf:"bytes,2,rep,name=service_health,json=serviceHealth,proto3" json:"service_health,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
@@ -337,6 +339,13 @@ func (x *SubmitHeartbeatRequest) GetHeartbeat() *BotInstanceStatusHeartbeat {
return nil
}
+func (x *SubmitHeartbeatRequest) GetServiceHealth() []*BotInstanceServiceHealth {
+ if x != nil {
+ return x.ServiceHealth
+ }
+ return nil
+}
+
// The response for SubmitHeartbeat.
type SubmitHeartbeatResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
@@ -396,9 +405,10 @@ const file_teleport_machineid_v1_bot_instance_service_proto_rawDesc = "" +
"\x18DeleteBotInstanceRequest\x12\x19\n" +
"\bbot_name\x18\x01 \x01(\tR\abotName\x12\x1f\n" +
"\vinstance_id\x18\x02 \x01(\tR\n" +
- "instanceId\"i\n" +
+ "instanceId\"\xc1\x01\n" +
"\x16SubmitHeartbeatRequest\x12O\n" +
- "\theartbeat\x18\x01 \x01(\v21.teleport.machineid.v1.BotInstanceStatusHeartbeatR\theartbeat\"\x19\n" +
+ "\theartbeat\x18\x01 \x01(\v21.teleport.machineid.v1.BotInstanceStatusHeartbeatR\theartbeat\x12V\n" +
+ "\x0eservice_health\x18\x02 \x03(\v2/.teleport.machineid.v1.BotInstanceServiceHealthR\rserviceHealth\"\x19\n" +
"\x17SubmitHeartbeatResponse2\xbd\x03\n" +
"\x12BotInstanceService\x12b\n" +
"\x0eGetBotInstance\x12,.teleport.machineid.v1.GetBotInstanceRequest\x1a\".teleport.machineid.v1.BotInstance\x12s\n" +
@@ -429,25 +439,27 @@ var file_teleport_machineid_v1_bot_instance_service_proto_goTypes = []any{
(*types.SortBy)(nil), // 6: types.SortBy
(*BotInstance)(nil), // 7: teleport.machineid.v1.BotInstance
(*BotInstanceStatusHeartbeat)(nil), // 8: teleport.machineid.v1.BotInstanceStatusHeartbeat
- (*emptypb.Empty)(nil), // 9: google.protobuf.Empty
+ (*BotInstanceServiceHealth)(nil), // 9: teleport.machineid.v1.BotInstanceServiceHealth
+ (*emptypb.Empty)(nil), // 10: google.protobuf.Empty
}
var file_teleport_machineid_v1_bot_instance_service_proto_depIdxs = []int32{
- 6, // 0: teleport.machineid.v1.ListBotInstancesRequest.sort:type_name -> types.SortBy
- 7, // 1: teleport.machineid.v1.ListBotInstancesResponse.bot_instances:type_name -> teleport.machineid.v1.BotInstance
- 8, // 2: teleport.machineid.v1.SubmitHeartbeatRequest.heartbeat:type_name -> teleport.machineid.v1.BotInstanceStatusHeartbeat
- 0, // 3: teleport.machineid.v1.BotInstanceService.GetBotInstance:input_type -> teleport.machineid.v1.GetBotInstanceRequest
- 1, // 4: teleport.machineid.v1.BotInstanceService.ListBotInstances:input_type -> teleport.machineid.v1.ListBotInstancesRequest
- 3, // 5: teleport.machineid.v1.BotInstanceService.DeleteBotInstance:input_type -> teleport.machineid.v1.DeleteBotInstanceRequest
- 4, // 6: teleport.machineid.v1.BotInstanceService.SubmitHeartbeat:input_type -> teleport.machineid.v1.SubmitHeartbeatRequest
- 7, // 7: teleport.machineid.v1.BotInstanceService.GetBotInstance:output_type -> teleport.machineid.v1.BotInstance
- 2, // 8: teleport.machineid.v1.BotInstanceService.ListBotInstances:output_type -> teleport.machineid.v1.ListBotInstancesResponse
- 9, // 9: teleport.machineid.v1.BotInstanceService.DeleteBotInstance:output_type -> google.protobuf.Empty
- 5, // 10: teleport.machineid.v1.BotInstanceService.SubmitHeartbeat:output_type -> teleport.machineid.v1.SubmitHeartbeatResponse
- 7, // [7:11] is the sub-list for method output_type
- 3, // [3:7] is the sub-list for method input_type
- 3, // [3:3] is the sub-list for extension type_name
- 3, // [3:3] is the sub-list for extension extendee
- 0, // [0:3] is the sub-list for field type_name
+ 6, // 0: teleport.machineid.v1.ListBotInstancesRequest.sort:type_name -> types.SortBy
+ 7, // 1: teleport.machineid.v1.ListBotInstancesResponse.bot_instances:type_name -> teleport.machineid.v1.BotInstance
+ 8, // 2: teleport.machineid.v1.SubmitHeartbeatRequest.heartbeat:type_name -> teleport.machineid.v1.BotInstanceStatusHeartbeat
+ 9, // 3: teleport.machineid.v1.SubmitHeartbeatRequest.service_health:type_name -> teleport.machineid.v1.BotInstanceServiceHealth
+ 0, // 4: teleport.machineid.v1.BotInstanceService.GetBotInstance:input_type -> teleport.machineid.v1.GetBotInstanceRequest
+ 1, // 5: teleport.machineid.v1.BotInstanceService.ListBotInstances:input_type -> teleport.machineid.v1.ListBotInstancesRequest
+ 3, // 6: teleport.machineid.v1.BotInstanceService.DeleteBotInstance:input_type -> teleport.machineid.v1.DeleteBotInstanceRequest
+ 4, // 7: teleport.machineid.v1.BotInstanceService.SubmitHeartbeat:input_type -> teleport.machineid.v1.SubmitHeartbeatRequest
+ 7, // 8: teleport.machineid.v1.BotInstanceService.GetBotInstance:output_type -> teleport.machineid.v1.BotInstance
+ 2, // 9: teleport.machineid.v1.BotInstanceService.ListBotInstances:output_type -> teleport.machineid.v1.ListBotInstancesResponse
+ 10, // 10: teleport.machineid.v1.BotInstanceService.DeleteBotInstance:output_type -> google.protobuf.Empty
+ 5, // 11: teleport.machineid.v1.BotInstanceService.SubmitHeartbeat:output_type -> teleport.machineid.v1.SubmitHeartbeatResponse
+ 8, // [8:12] is the sub-list for method output_type
+ 4, // [4:8] is the sub-list for method input_type
+ 4, // [4:4] is the sub-list for extension type_name
+ 4, // [4:4] is the sub-list for extension extendee
+ 0, // [0:4] is the sub-list for field type_name
}
func init() { file_teleport_machineid_v1_bot_instance_service_proto_init() }
diff --git a/api/proto/teleport/machineid/v1/bot_instance.proto b/api/proto/teleport/machineid/v1/bot_instance.proto
index 1010412b1a6a8..d31b9b4947262 100644
--- a/api/proto/teleport/machineid/v1/bot_instance.proto
+++ b/api/proto/teleport/machineid/v1/bot_instance.proto
@@ -167,4 +167,47 @@ message BotInstanceStatus {
BotInstanceStatusHeartbeat initial_heartbeat = 3;
// The N most recent heartbeats for this bot instance.
repeated BotInstanceStatusHeartbeat latest_heartbeats = 4;
+ // The health of the services/output `tbot` is running.
+ repeated BotInstanceServiceHealth service_health = 5;
+}
+
+// BotInstanceServiceHealth is a snapshot of a `tbot` service's health.
+message BotInstanceServiceHealth {
+ // Service identifies the service.
+ BotInstanceServiceIdentifier service = 1;
+
+ // Status describes the service's healthiness.
+ BotInstanceHealthStatus status = 2;
+
+ // Reason is a human-readable explanation for the service's status. It might
+ // include an error message.
+ optional string reason = 3;
+
+ // UpdatedAt is the time at which the service's health last changed.
+ google.protobuf.Timestamp updated_at = 4;
+}
+
+// BotInstanceServiceIdentifier uniquely identifies a `tbot` service.
+message BotInstanceServiceIdentifier {
+ // Type of service (e.g. database-tunnel, ssh-multiplexer).
+ string type = 1;
+
+ // Name of the service, either given by the user or auto-generated.
+ string name = 2;
+}
+
+// BotInstanceHealthStatus describes the healthiness of a `tbot` service.
+enum BotInstanceHealthStatus {
+ // The enum zero-value, it means no status was included.
+ BOT_INSTANCE_HEALTH_STATUS_UNSPECIFIED = 0;
+
+ // Means the service is still "starting up" and hasn't reported its status.
+ BOT_INSTANCE_HEALTH_STATUS_INITIALIZING = 1;
+
+ // Means the service is healthy and ready to serve traffic, or it has
+ // recently succeeded in generating an output.
+ BOT_INSTANCE_HEALTH_STATUS_HEALTHY = 2;
+
+ // Means the service is failing to serve traffic or generate output.
+ BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY = 3;
}
diff --git a/api/proto/teleport/machineid/v1/bot_instance_service.proto b/api/proto/teleport/machineid/v1/bot_instance_service.proto
index f0802486fccd4..9e68a1697d6b7 100644
--- a/api/proto/teleport/machineid/v1/bot_instance_service.proto
+++ b/api/proto/teleport/machineid/v1/bot_instance_service.proto
@@ -71,6 +71,9 @@ message DeleteBotInstanceRequest {
message SubmitHeartbeatRequest {
// The heartbeat data to submit.
BotInstanceStatusHeartbeat heartbeat = 1;
+
+ // The health of the services/output `tbot` is running.
+ repeated BotInstanceServiceHealth service_health = 2;
}
// The response for SubmitHeartbeat.
diff --git a/docs/pages/reference/machine-id/diagnostics-service.mdx b/docs/pages/reference/machine-id/diagnostics-service.mdx
index a7a688dea7937..1b828e19f373e 100644
--- a/docs/pages/reference/machine-id/diagnostics-service.mdx
+++ b/docs/pages/reference/machine-id/diagnostics-service.mdx
@@ -93,8 +93,8 @@ Content-Type: application/json
}
```
-By default, `tbot` generates service names based on their configuration such as
-the output destination. You can override this by providing your own name in the
+By default, `tbot` generates service names based on their type (e.g.
+`application-output-1`). You can override this by providing your own name in the
`tbot` configuration file.
```yaml
diff --git a/lib/auth/machineid/machineidv1/bot_instance_service.go b/lib/auth/machineid/machineidv1/bot_instance_service.go
index 24d59081885e2..6d0f60e98f8b4 100644
--- a/lib/auth/machineid/machineidv1/bot_instance_service.go
+++ b/lib/auth/machineid/machineidv1/bot_instance_service.go
@@ -21,6 +21,8 @@ package machineidv1
import (
"context"
"log/slog"
+ "os"
+ "strconv"
"time"
"github.com/gravitational/trace"
@@ -46,6 +48,12 @@ const (
// ensure the instance remains accessible until shortly after the last
// issued certificate expires.
ExpiryMargin = time.Minute * 5
+
+ // serviceNameLimit is the maximum length in bytes of a bot service name.
+ serviceNameLimit = 64
+
+ // statusReasonLimit is the maximum length in bytes of a service status reason.
+ statusReasonLimit = 256
)
// BotInstancesCache is the subset of the cached resources that the Service queries.
@@ -178,6 +186,17 @@ func (b *BotInstanceService) SubmitHeartbeat(ctx context.Context, req *pb.Submit
return nil, trace.BadParameter("heartbeat: must be non-nil")
}
+ for _, svcHealth := range req.GetServiceHealth() {
+ name := svcHealth.GetService().GetName()
+ if len(name) > serviceNameLimit {
+ return nil, trace.BadParameter("service name %q is longer than %d bytes", name, serviceNameLimit)
+ }
+ reason := svcHealth.GetReason()
+ if len(reason) > statusReasonLimit {
+ return nil, trace.BadParameter("service %q has a status reason longer than %d bytes", name, statusReasonLimit)
+ }
+ }
+
// Enforce that the connecting client is a bot and has a bot instance ID.
botName := authCtx.Identity.GetIdentity().BotName
botInstanceID := authCtx.Identity.GetIdentity().BotInstanceID
@@ -212,6 +231,11 @@ func (b *BotInstanceService) SubmitHeartbeat(ctx context.Context, req *pb.Submit
// Append the new heartbeat to the end.
instance.Status.LatestHeartbeats = append(instance.Status.LatestHeartbeats, req.Heartbeat)
+ if storeHeartbeatExtras() {
+ // Overwrite the service health.
+ instance.Status.ServiceHealth = req.ServiceHealth
+ }
+
return instance, nil
})
if err != nil {
@@ -220,3 +244,15 @@ func (b *BotInstanceService) SubmitHeartbeat(ctx context.Context, req *pb.Submit
return &pb.SubmitHeartbeatResponse{}, nil
}
+
+// storeHeartbeatExtras returns whether we should store "extra" data submitted
+// with tbot heartbeats, such as the service health. Defaults to true unless the
+// TELEPORT_DISABLE_TBOT_HEARTBEAT_EXTRAS environment variable is set to true on
+// the auth server.
+func storeHeartbeatExtras() bool {
+ disabled, err := strconv.ParseBool(os.Getenv("TELEPORT_DISABLE_TBOT_HEARTBEAT_EXTRAS"))
+ if err != nil {
+ return true
+ }
+ return !disabled
+}
diff --git a/lib/auth/machineid/machineidv1/bot_instance_service_test.go b/lib/auth/machineid/machineidv1/bot_instance_service_test.go
index 5de41a878a7e4..9fa7a2dc6f86f 100644
--- a/lib/auth/machineid/machineidv1/bot_instance_service_test.go
+++ b/lib/auth/machineid/machineidv1/bot_instance_service_test.go
@@ -23,6 +23,7 @@ import (
"fmt"
"slices"
"strconv"
+ "strings"
"testing"
"github.com/google/go-cmp/cmp"
@@ -257,6 +258,7 @@ func TestBotInstanceServiceSubmitHeartbeat(t *testing.T) {
createBotInstance bool
assertErr assert.ErrorAssertionFunc
wantHeartbeat bool
+ wantServiceHealth []*machineidv1.BotInstanceServiceHealth
}{
{
name: "success",
@@ -265,10 +267,30 @@ func TestBotInstanceServiceSubmitHeartbeat(t *testing.T) {
Heartbeat: &machineidv1.BotInstanceStatusHeartbeat{
Hostname: "llama",
},
+ ServiceHealth: []*machineidv1.BotInstanceServiceHealth{
+ {
+ Service: &machineidv1.BotInstanceServiceIdentifier{
+ Type: "application-tunnel",
+ Name: "my-application-tunnel",
+ },
+ Status: machineidv1.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY,
+ Reason: ptr("application is broken"),
+ },
+ },
},
identity: goodIdentity,
assertErr: assert.NoError,
wantHeartbeat: true,
+ wantServiceHealth: []*machineidv1.BotInstanceServiceHealth{
+ {
+ Service: &machineidv1.BotInstanceServiceIdentifier{
+ Type: "application-tunnel",
+ Name: "my-application-tunnel",
+ },
+ Status: machineidv1.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY,
+ Reason: ptr("application is broken"),
+ },
+ },
},
{
name: "missing bot name",
@@ -327,6 +349,54 @@ func TestBotInstanceServiceSubmitHeartbeat(t *testing.T) {
},
wantHeartbeat: false,
},
+ {
+ name: "service name too long",
+ createBotInstance: true,
+ req: &machineidv1.SubmitHeartbeatRequest{
+ Heartbeat: &machineidv1.BotInstanceStatusHeartbeat{
+ Hostname: "llama",
+ },
+ ServiceHealth: []*machineidv1.BotInstanceServiceHealth{
+ {
+ Service: &machineidv1.BotInstanceServiceIdentifier{
+ Type: "application-tunnel",
+ Name: strings.Repeat("a", 100),
+ },
+ Status: machineidv1.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY,
+ Reason: ptr("application is broken"),
+ },
+ },
+ },
+ identity: goodIdentity,
+ assertErr: func(t assert.TestingT, err error, i ...any) bool {
+ return assert.True(t, trace.IsBadParameter(err)) && assert.Contains(t, err.Error(), "is longer than 64 bytes")
+ },
+ wantHeartbeat: false,
+ },
+ {
+ name: "status reason too long",
+ createBotInstance: true,
+ req: &machineidv1.SubmitHeartbeatRequest{
+ Heartbeat: &machineidv1.BotInstanceStatusHeartbeat{
+ Hostname: "llama",
+ },
+ ServiceHealth: []*machineidv1.BotInstanceServiceHealth{
+ {
+ Service: &machineidv1.BotInstanceServiceIdentifier{
+ Type: "application-tunnel",
+ Name: "my-application-tunnel",
+ },
+ Status: machineidv1.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY,
+ Reason: ptr(strings.Repeat("a", 300)),
+ },
+ },
+ },
+ identity: goodIdentity,
+ assertErr: func(t assert.TestingT, err error, i ...any) bool {
+ return assert.True(t, trace.IsBadParameter(err)) && assert.Contains(t, err.Error(), "status reason longer than 256 bytes")
+ },
+ wantHeartbeat: false,
+ },
}
for _, tt := range tests {
@@ -377,6 +447,13 @@ func TestBotInstanceServiceSubmitHeartbeat(t *testing.T) {
assert.Nil(t, bi.Status.InitialHeartbeat)
assert.Empty(t, bi.Status.LatestHeartbeats)
}
+ assert.Empty(t,
+ cmp.Diff(
+ bi.Status.ServiceHealth,
+ tt.wantServiceHealth,
+ protocmp.Transform(),
+ ),
+ )
}
})
}
@@ -591,3 +668,5 @@ func newBotInstanceService(
return service
}
+
+func ptr[T any](v T) *T { return &v }
diff --git a/lib/tbot/bot/bot.go b/lib/tbot/bot/bot.go
index b48b7f1731ff0..989019d679469 100644
--- a/lib/tbot/bot/bot.go
+++ b/lib/tbot/bot/bot.go
@@ -72,26 +72,38 @@ func (b *Bot) Run(ctx context.Context) (err error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
- services, closer, err := b.buildServices(ctx)
+ registry := readyz.NewRegistry()
+
+ services, closer, err := b.buildServices(ctx, registry)
defer closer()
if err != nil {
return trace.Wrap(err)
}
+ for _, handle := range services {
+ // If the service builder called ServiceDependencies.GetStatusReporter,
+ // we take that as a promise that the service's Run method will report
+ // statuses. Otherwise we will not include the service in heartbeats or
+ // the `/readyz` endpoint.
+ if handle.statusReporter.used {
+ handle.statusReporter.reporter = registry.AddService(handle.serviceType, handle.name)
+ }
+ }
+
b.cfg.Logger.InfoContext(ctx, "Initialization complete. Starting services")
group, groupCtx := errgroup.WithContext(ctx)
- for _, svc := range services {
- svc := svc
- log := b.cfg.Logger.With("service", svc.String())
+ for _, handle := range services {
+ handle := handle
+ log := b.cfg.Logger.With("service", handle.name)
group.Go(func() error {
log.InfoContext(groupCtx, "Starting service")
- err := svc.Run(groupCtx)
+ err := handle.service.Run(groupCtx)
if err != nil {
log.ErrorContext(ctx, "Service exited with error", "error", err)
- return trace.Wrap(err, "service(%s)", svc.String())
+ return trace.Wrap(err, "service(%s)", handle.name)
}
log.InfoContext(groupCtx, "Service exited")
@@ -113,32 +125,48 @@ func (b *Bot) OneShot(ctx context.Context) (err error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
- services, closer, err := b.buildServices(ctx)
+ registry := readyz.NewRegistry()
+
+ services, closer, err := b.buildServices(ctx, registry)
defer closer()
if err != nil {
return trace.Wrap(err)
}
- group, groupCtx := errgroup.WithContext(ctx)
- for _, service := range services {
- log := b.cfg.Logger.With("service", service.String())
-
- svc, ok := service.(OneShotService)
- if !ok {
- log.InfoContext(ctx, "Service does not support oneshot mode, will not run")
+ // Filter out the services that don't support oneshot mode.
+ oneShotServices := make([]*serviceHandle, 0, len(services))
+ for _, handle := range services {
+ handle := handle
+ if _, ok := handle.service.(OneShotService); !ok {
+ b.cfg.Logger.InfoContext(ctx,
+ "Service does not support oneshot mode, will not run",
+ "service", handle.name,
+ )
continue
}
+ // Add oneshot services to the registry.
+ handle.statusReporter.reporter = registry.AddService(handle.serviceType, handle.name)
+ oneShotServices = append(oneShotServices, handle)
+ }
+
+ group, groupCtx := errgroup.WithContext(ctx)
+ for _, handle := range oneShotServices {
+ handle := handle
+ log := b.cfg.Logger.With("service", handle.name)
+
group.Go(func() error {
log.DebugContext(groupCtx, "Running service in oneshot mode")
- err := svc.OneShot(groupCtx)
+ err := handle.service.(OneShotService).OneShot(groupCtx)
if err != nil {
log.ErrorContext(ctx, "Service exited with error", "error", err)
- return trace.Wrap(err, "service(%s)", svc.String())
+ handle.statusReporter.ReportReason(readyz.Unhealthy, err.Error())
+ return trace.Wrap(err, "service(%s)", handle.name)
}
log.InfoContext(groupCtx, "Service finished")
+ handle.statusReporter.Report(readyz.Healthy)
return nil
})
}
@@ -152,19 +180,19 @@ func (b *Bot) checkStarted() error {
return trace.BadParameter("bot has already been started")
}
-func (b *Bot) buildServices(ctx context.Context) ([]Service, func(), error) {
+func (b *Bot) buildServices(ctx context.Context, registry *readyz.Registry) ([]*serviceHandle, func(), error) {
startedAt := time.Now().UTC()
- services := make([]Service, 0, len(b.cfg.Services))
- var closers []func()
+ handles := make([]*serviceHandle, 0, len(b.cfg.Services))
closeFn := func() {
- for _, fn := range closers {
- fn()
+ for _, h := range handles {
+ if h.closeFn != nil {
+ h.closeFn()
+ }
}
}
// Build internal services and dependencies.
- statusRegistry := readyz.NewRegistry()
reloadBroadcaster := b.buildReloadBroadcaster(ctx)
resolver, err := b.buildResolver(ctx)
@@ -177,37 +205,34 @@ func (b *Bot) buildServices(ctx context.Context) ([]Service, func(), error) {
return nil, closeFn, trace.Wrap(err, "building client builder")
}
- identityService, closeIdentityService, err := b.buildIdentityService(
+ identityService, identityServiceHandle, err := b.buildIdentityService(
ctx,
reloadBroadcaster,
clientBuilder,
- statusRegistry,
)
if err != nil {
return nil, closeFn, trace.Wrap(err, "building identity service")
}
- services = append(services, identityService)
- closers = append(closers, closeIdentityService)
+ handles = append(handles, identityServiceHandle)
heartbeatService, err := b.buildHeartbeatService(
identityService,
startedAt,
- statusRegistry,
+ registry,
)
if err != nil {
return nil, closeFn, trace.Wrap(err, "building heartbeat service")
}
- services = append(services, heartbeatService)
+ handles = append(handles, heartbeatService)
caRotationService, err := b.buildCARotationService(
reloadBroadcaster,
identityService,
- statusRegistry,
)
if err != nil {
return nil, closeFn, trace.Wrap(err, "building CA rotation service")
}
- services = append(services, caRotationService)
+ handles = append(handles, caRotationService)
proxyPinger, err := b.buildProxyPinger(identityService)
if err != nil {
@@ -220,28 +245,42 @@ func (b *Bot) buildServices(ctx context.Context) ([]Service, func(), error) {
}
// Build user services.
- for idx, buildService := range b.cfg.Services {
+ for idx, builder := range b.cfg.Services {
reloadCh, unsubscribe := reloadBroadcaster.Subscribe()
- closers = append(closers, unsubscribe)
- service, err := buildService(ServiceDependencies{
+ handle := &serviceHandle{
+ closeFn: unsubscribe,
+ statusReporter: &statusReporter{},
+ }
+ handle.serviceType, handle.name = builder.GetTypeAndName()
+
+ var err error
+ handle.service, err = builder.Build(ServiceDependencies{
Client: identityService.GetClient(),
Resolver: resolver,
- Logger: b.cfg.Logger,
ClientBuilder: clientBuilder,
IdentityGenerator: identityGenerator,
ProxyPinger: proxyPinger,
BotIdentity: identityService.GetIdentity,
BotIdentityReadyCh: identityService.Ready(),
ReloadCh: reloadCh,
- StatusRegistry: statusRegistry,
+ StatusRegistry: registry,
+ GetStatusReporter: func() readyz.Reporter {
+ handle.statusReporter.used = true
+ return handle.statusReporter
+ },
+ Logger: b.cfg.Logger.With(
+ teleport.ComponentKey,
+ teleport.Component(teleport.ComponentTBot, "svc", handle.name),
+ ),
})
if err != nil {
return nil, closeFn, trace.Wrap(err, "building service [%d]", idx)
}
- services = append(services, service)
+
+ handles = append(handles, handle)
}
- return services, closeFn, nil
+ return handles, closeFn, nil
}
func (b *Bot) buildReloadBroadcaster(ctx context.Context) *internal.ChannelBroadcaster {
@@ -302,10 +341,14 @@ func (b *Bot) buildIdentityService(
ctx context.Context,
reloadBroadcaster *internal.ChannelBroadcaster,
clientBuilder *client.Builder,
- statusRegistry *readyz.Registry,
-) (*identity.Service, func(), error) {
- reloadCh, unsubscribe := reloadBroadcaster.Subscribe()
+) (*identity.Service, *serviceHandle, error) {
+ handle := &serviceHandle{
+ serviceType: "internal/identity",
+ name: "identity",
+ statusReporter: &statusReporter{used: true},
+ }
+ reloadCh, unsubscribe := reloadBroadcaster.Subscribe()
identityService, err := identity.NewService(identity.Config{
Connection: b.cfg.Connection,
Onboarding: b.cfg.Onboarding,
@@ -315,18 +358,19 @@ func (b *Bot) buildIdentityService(
FIPS: b.cfg.FIPS,
Logger: b.cfg.Logger.With(
teleport.ComponentKey,
- teleport.Component(teleport.ComponentTBot, "identity"),
+ teleport.Component(teleport.ComponentTBot, handle.name),
),
ClientBuilder: clientBuilder,
ReloadCh: reloadCh,
- StatusReporter: statusRegistry.AddService("identity"),
+ StatusReporter: handle.statusReporter,
})
if err != nil {
unsubscribe()
return nil, nil, trace.Wrap(err, "building identity service")
}
- close := func() {
+ handle.service = identityService
+ handle.closeFn = func() {
if err := identityService.Close(); err != nil {
b.cfg.Logger.ErrorContext(
ctx,
@@ -338,20 +382,25 @@ func (b *Bot) buildIdentityService(
}
if err := identityService.Initialize(ctx); err != nil {
- close()
+ handle.closeFn()
return nil, nil, trace.Wrap(err, "initializing identity service")
}
-
- return identityService, close, nil
+ return identityService, handle, nil
}
func (b *Bot) buildHeartbeatService(
identityService *identity.Service,
startedAt time.Time,
statusRegistry *readyz.Registry,
-) (*heartbeat.Service, error) {
- return heartbeat.NewService(heartbeat.Config{
- BotKind: machineidv1.BotKind(b.cfg.Kind),
+) (*serviceHandle, error) {
+ handle := &serviceHandle{
+ serviceType: "internal/heartbeat",
+ name: "heartbeat",
+ statusReporter: &statusReporter{used: true},
+ }
+
+ var err error
+ handle.service, err = heartbeat.NewService(heartbeat.Config{
Interval: 30 * time.Minute,
RetryLimit: 5,
Client: machineidv1.NewBotInstanceServiceClient(identityService.GetClient().GetConnection()),
@@ -359,10 +408,16 @@ func (b *Bot) buildHeartbeatService(
StartedAt: startedAt,
JoinMethod: b.cfg.Onboarding.JoinMethod,
Logger: b.cfg.Logger.With(
- teleport.ComponentKey, teleport.Component(teleport.ComponentTBot, "heartbeat"),
+ teleport.ComponentKey,
+ teleport.Component(teleport.ComponentTBot, handle.name),
),
- StatusReporter: statusRegistry.AddService("heartbeat"),
+ StatusReporter: handle.statusReporter,
+ StatusRegistry: statusRegistry,
})
+ if err != nil {
+ return nil, trace.Wrap(err)
+ }
+ return handle, nil
}
func (b *Bot) buildProxyPinger(identityService *identity.Service) (connection.ProxyPinger, error) {
@@ -379,17 +434,65 @@ func (b *Bot) buildProxyPinger(identityService *identity.Service) (connection.Pr
func (b *Bot) buildCARotationService(
reloadBroadcaster *internal.ChannelBroadcaster,
identityService *identity.Service,
- statusRegistry *readyz.Registry,
-) (*carotation.Service, error) {
- return carotation.NewService(carotation.Config{
+) (*serviceHandle, error) {
+ handle := &serviceHandle{
+ serviceType: "internal/ca-rotation",
+ name: "ca-rotation",
+ statusReporter: &statusReporter{used: true},
+ }
+
+ var err error
+ handle.service, err = carotation.NewService(carotation.Config{
BroadcastFn: reloadBroadcaster.Broadcast,
Client: identityService.GetClient(),
GetBotIdentityFn: identityService.GetIdentity,
BotIdentityReadyCh: identityService.Ready(),
Logger: b.cfg.Logger.With(
teleport.ComponentKey,
- teleport.Component(teleport.ComponentTBot, "ca-rotation"),
+ teleport.Component(teleport.ComponentTBot, handle.name),
),
- StatusReporter: statusRegistry.AddService("ca-rotation"),
+ StatusReporter: handle.statusReporter,
})
+ if err != nil {
+ return nil, trace.Wrap(err)
+ }
+ return handle, nil
+}
+
+// serviceHandle contains a built service, its type/name, close function, and a
+// pointer to its status reporter - so we can automatically report statuses for
+// oneshot services.
+type serviceHandle struct {
+ serviceType, name string
+ service Service
+ statusReporter *statusReporter
+ closeFn func()
+}
+
+// statusReporter wraps readyz.Reporter to break a circular dependency where:
+//
+// - We need a status reporter to build a service
+// - We must register the service in order to get the status reporter
+// - We may not want to register the service in one-shot mode if it does not
+// implement OneShotService
+// - We can only know whether the service implements OneShotService after
+// we've built it
+//
+// This wrapper allows us to defer the actual registration until we know whether
+// the service implements OneShotService.
+type statusReporter struct {
+ used bool
+ reporter readyz.Reporter
+}
+
+func (r *statusReporter) Report(status readyz.Status) {
+ if r.reporter != nil {
+ r.reporter.Report(status)
+ }
+}
+
+func (r *statusReporter) ReportReason(status readyz.Status, reason string) {
+ if r.reporter != nil {
+ r.reporter.ReportReason(status, reason)
+ }
}
diff --git a/lib/tbot/bot/service.go b/lib/tbot/bot/service.go
index a5ba3c9deb121..4b216117aff7d 100644
--- a/lib/tbot/bot/service.go
+++ b/lib/tbot/bot/service.go
@@ -20,17 +20,18 @@ package bot
import (
"context"
+ "fmt"
"log/slog"
"golang.org/x/sync/errgroup"
- "github.com/gravitational/teleport"
apiclient "github.com/gravitational/teleport/api/client"
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/tbot/bot/connection"
"github.com/gravitational/teleport/lib/tbot/client"
"github.com/gravitational/teleport/lib/tbot/identity"
"github.com/gravitational/teleport/lib/tbot/readyz"
+ "github.com/gravitational/teleport/lib/utils"
)
// Service is a long-running sub-component of tbot.
@@ -65,8 +66,8 @@ type ServiceDependencies struct {
// Resolver that can be used to look up proxy addresses.
Resolver reversetunnelclient.Resolver
- // Logger to which errors and messages can be written. It's the service's
- // responsibility to tag the logger with a component.
+ // Logger to which errors and messages can be written, with its component
+ // set to the name of the service.
Logger *slog.Logger
// ProxyPinger can be used to ping the proxy or auth server to discover
@@ -92,21 +93,62 @@ type ServiceDependencies struct {
// it's time to reload their certificates (e.g. following a CA rotation).
ReloadCh <-chan struct{}
- // StatusRegistry is the registry the service can register itself with to
- // report service health.
- StatusRegistry *readyz.Registry
+ // GetStatusReporter returns the reporter to which the service should report
+ // its health.
+ //
+ // If a ServiceBuilder calls GetStatusReporter the service's Run method *MUST*
+ // call Report or ReportReason (or if using internal.RunOnInterval pass it the
+ // reporter) otherwise it will delay the initial heartbeat and the `/readyz`
+ // endpoint will return 503.
+ //
+ // You do not have to do this in your service's OneShot method as the bot
+ // will automatically report oneshot service status based on its return value.
+ GetStatusReporter func() readyz.Reporter
+
+ // StatusRegistry can be used to read the health of the bot's services.
+ StatusRegistry readyz.ReadOnlyRegistry
}
-// LoggerForService returns a logger with the service's name as its component.
-func (deps ServiceDependencies) LoggerForService(svc Service) *slog.Logger {
- return deps.Logger.With(
- teleport.ComponentKey,
- teleport.Component(teleport.ComponentTBot, "svc", svc.String()),
- )
+// ServiceBuilder will be used by the bot to create a service.
+type ServiceBuilder interface {
+ // GetTypeAndName returns the service type and name.
+ GetTypeAndName() (string, string)
+
+ // Build the service using the given dependencies.
+ Build(ServiceDependencies) (Service, error)
+}
+
+// NewServiceBuilder creates a ServiceBuilder with the given service type, name
+// and build function.
+func NewServiceBuilder(
+ serviceType, name string,
+ buildFn func(ServiceDependencies) (Service, error),
+) ServiceBuilder {
+ if name == "" {
+ // The tbot binary will set default service names, so name could only
+ // realistically be empty if the bot were embedded somewhere else (e.g.
+ // the Terraform provider) in which case a randomly generated name is
+ // better than nothing.
+ //
+ // We do not handle the error from CryptoRandHex because the underlying
+ // call to rand.Read will never fail.
+ suffix, _ := utils.CryptoRandomHex(4)
+ name = fmt.Sprintf("%s-%s", serviceType, suffix)
+ }
+ return &serviceBuilder{
+ serviceType: serviceType,
+ name: name,
+ buildFn: buildFn,
+ }
}
-// ServiceBuilder will be called by the bot to create a service.
-type ServiceBuilder func(ServiceDependencies) (Service, error)
+type serviceBuilder struct {
+ serviceType, name string
+ buildFn func(ServiceDependencies) (Service, error)
+}
+
+func (b *serviceBuilder) GetTypeAndName() (string, string) { return b.serviceType, b.name }
+func (b *serviceBuilder) Build(deps ServiceDependencies) (Service, error) { return b.buildFn(deps) }
// ServicePair combines two related Services.
type ServicePair struct{ primary, secondary Service }
@@ -166,13 +208,6 @@ func (s *OneShotServicePair) OneShot(ctx context.Context) error {
return group.Wait()
}
-// LiteralService create a ServiceBuilder that returns the service as-is.
-func LiteralService(service Service) ServiceBuilder {
- return func(ServiceDependencies) (Service, error) {
- return service, nil
- }
-}
-
// NewNopService returns a service with the given name that does nothing at all.
func NewNopService(name string) NopService {
return NopService{name: name}
diff --git a/lib/tbot/config/config.go b/lib/tbot/config/config.go
index 89873a87fb810..5da45f85f9f06 100644
--- a/lib/tbot/config/config.go
+++ b/lib/tbot/config/config.go
@@ -27,7 +27,6 @@ import (
"io"
"net/url"
"os"
- "regexp"
"slices"
"strings"
"time"
@@ -60,38 +59,6 @@ const (
DefaultRenewInterval = 20 * time.Minute
)
-// ReservedServiceNames are the service names reserved for internal use.
-var ReservedServiceNames = []string{
- "ca-rotation",
- "crl-cache",
- "heartbeat",
- "identity",
- "spiffe-trust-bundle-cache",
-}
-
-var reservedServiceNamesMap = func() map[string]struct{} {
- m := make(map[string]struct{}, len(ReservedServiceNames))
- for _, k := range ReservedServiceNames {
- m[k] = struct{}{}
- }
- return m
-}()
-
-var serviceNameRegex = regexp.MustCompile(`\A[a-z\d_\-+]+\z`)
-
-func validateServiceName(name string) error {
- if name == "" {
- return nil
- }
- if _, ok := reservedServiceNamesMap[name]; ok {
- return trace.BadParameter("service name %q is reserved for internal use", name)
- }
- if !serviceNameRegex.MatchString(name) {
- return trace.BadParameter("invalid service name: %q, may only contain lowercase letters, numbers, hyphens, underscores, or plus symbols", name)
- }
- return nil
-}
-
var log = logutils.NewPackageLogger(teleport.ComponentKey, teleport.ComponentTBot)
// BotConfig is the bot's root config object.
@@ -137,6 +104,10 @@ type BotConfig struct {
// renewal.
ReloadCh <-chan struct{} `yaml:"-"`
+ // Testing is set in unit tests to attach a faux service which exposes the
+ // bot's underlying identity and client so we can make assertions on it.
+ Testing bool `yaml:"-"`
+
// Insecure configures the bot to trust the certificates from the Auth Server or Proxy on first connect without verification.
// Do not use in production.
Insecure bool `yaml:"insecure,omitempty"`
@@ -219,9 +190,11 @@ func (conf *BotConfig) CheckAndSetDefaults() error {
return trace.Wrap(err)
}
- // We've migrated Outputs to Services, so copy all Outputs to Services.
+ // We've migrated Outputs to Services, so move all Outputs to Services.
conf.Services = append(conf.Services, conf.Outputs...)
- uniqueNames := make(map[string]struct{}, len(conf.Services))
+ conf.Outputs = nil
+
+ namer := newServiceNamer()
for i, service := range conf.Services {
if err := service.CheckAndSetDefaults(); err != nil {
return trace.Wrap(err, "validating service[%d]", i)
@@ -229,14 +202,13 @@ func (conf *BotConfig) CheckAndSetDefaults() error {
if err := service.GetCredentialLifetime().Validate(conf.Oneshot); err != nil {
return trace.Wrap(err, "validating service[%d]", i)
}
- if name := service.GetName(); name != "" {
- if err := validateServiceName(name); err != nil {
- return trace.Wrap(err, "validating service[%d]", i)
- }
- if _, seen := uniqueNames[name]; seen {
- return trace.BadParameter("validating service[%d]: duplicate name: %q", i, name)
- }
- uniqueNames[name] = struct{}{}
+
+ name, err := namer.pickName(service.Type(), service.GetName())
+ switch {
+ case err != nil:
+ return trace.Wrap(err, "validating service[%d]", i)
+ case name != service.GetName():
+ service.SetName(name)
}
}
@@ -332,9 +304,14 @@ type ServiceConfig interface {
// support these options should return the zero value.
GetCredentialLifetime() bot.CredentialLifetime
- // GetName returns the user-given name of the service, used for validation
- // purposes.
+ // GetName returns the service's given name. Initially the name chosen by
+ // the user, but after BotConfig.CheckAndSetDefaults is called it may be
+ // our automatically generated name.
GetName() string
+
+ // SetName is called by BotConfig.CheckAndSetDefaults to assign the service
+ // a new name if the user didn't specify one.
+ SetName(string)
}
// ServiceConfigs assists polymorphic unmarshaling of a slice of ServiceConfigs.
diff --git a/lib/tbot/config/config_test.go b/lib/tbot/config/config_test.go
index efec900a31cc5..54cc82899177b 100644
--- a/lib/tbot/config/config_test.go
+++ b/lib/tbot/config/config_test.go
@@ -553,62 +553,4 @@ func TestBotConfig_Base64(t *testing.T) {
}
}
-func TestBotConfig_NameValidation(t *testing.T) {
- t.Parallel()
-
- testCases := map[string]struct {
- cfg *BotConfig
- err string
- }{
- "duplicate names": {
- cfg: &BotConfig{
- Version: V2,
- Services: ServiceConfigs{
- &identity.OutputConfig{
- Name: "foo",
- Destination: &destination.Memory{},
- },
- &identity.OutputConfig{
- Name: "foo",
- Destination: &destination.Memory{},
- },
- },
- },
- err: `duplicate name: "foo`,
- },
- "reserved name": {
- cfg: &BotConfig{
- Version: V2,
- Services: ServiceConfigs{
- &identity.OutputConfig{
- Name: "identity",
- Destination: &destination.Memory{},
- },
- },
- },
- err: `service name "identity" is reserved for internal use`,
- },
- "invalid name": {
- cfg: &BotConfig{
- Version: V2,
- Services: ServiceConfigs{
- &identity.OutputConfig{
- Name: "hello, world!",
- Destination: &destination.Memory{},
- },
- },
- },
- err: `may only contain lowercase letters`,
- },
- }
- for desc, tc := range testCases {
- t.Run(desc, func(t *testing.T) {
- t.Parallel()
- require.ErrorContains(t, tc.cfg.CheckAndSetDefaults(), tc.err)
- })
- }
-}
-
-func ptr[T any](v T) *T {
- return &v
-}
+func ptr[T any](v T) *T { return &v }
diff --git a/lib/tbot/config/naming.go b/lib/tbot/config/naming.go
new file mode 100644
index 0000000000000..845b35728a36d
--- /dev/null
+++ b/lib/tbot/config/naming.go
@@ -0,0 +1,80 @@
+/*
+ * Teleport
+ * Copyright (C) 2025 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package config
+
+import (
+ "fmt"
+ "regexp"
+
+ "github.com/gravitational/trace"
+)
+
+// reservedServiceNames are the service names reserved for internal use.
+var reservedServiceNames = map[string]struct{}{
+ "ca-rotation": {},
+ "crl-cache": {},
+ "heartbeat": {},
+ "identity": {},
+ "spiffe-trust-bundle-cache": {},
+}
+
+var invalidServiceNameRegex = regexp.MustCompile(`[^a-z\d_\-+]`)
+
+type serviceNamer struct {
+ usedNames map[string]struct{}
+ countByServiceType map[string]int
+}
+
+func newServiceNamer() *serviceNamer {
+ return &serviceNamer{
+ usedNames: make(map[string]struct{}),
+ countByServiceType: make(map[string]int),
+ }
+}
+
+// pickName checks the user-chosen name is valid (e.g. not reserved for internal
+// use or containing illegal characters) if one is given. If no name is given,
+// it will generate a name based on the service type with a counter suffix.
+func (n *serviceNamer) pickName(serviceType, name string) (string, error) {
+ n.countByServiceType[serviceType]++
+
+ if name == "" {
+ name = fmt.Sprintf(
+ "%s-%d",
+ invalidServiceNameRegex.ReplaceAllString(serviceType, "-"),
+ n.countByServiceType[serviceType],
+ )
+ if _, ok := n.usedNames[name]; ok {
+ return "", trace.BadParameter("service name %q conflicts with an automatically generated service name", name)
+ }
+ } else {
+ if _, ok := n.usedNames[name]; ok {
+ return "", trace.BadParameter("service name %q used more than once", name)
+ }
+ if _, ok := reservedServiceNames[name]; ok {
+ return "", trace.BadParameter("service name %q is reserved for internal use", name)
+ }
+ if invalidServiceNameRegex.MatchString(name) {
+ return "", trace.BadParameter("invalid service name: %q, may only contain lowercase letters, numbers, hyphens, underscores, or plus symbols", name)
+ }
+ }
+
+ n.usedNames[name] = struct{}{}
+ return name, nil
+}
diff --git a/lib/tbot/config/naming_test.go b/lib/tbot/config/naming_test.go
new file mode 100644
index 0000000000000..b2108a936b52f
--- /dev/null
+++ b/lib/tbot/config/naming_test.go
@@ -0,0 +1,83 @@
+/*
+ * Teleport
+ * Copyright (C) 2025 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package config
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestServiceNamer(t *testing.T) {
+ t.Parallel()
+
+ t.Run("success", func(t *testing.T) {
+ namer := newServiceNamer()
+
+ // Chosen name is used.
+ name, err := namer.pickName("type/1", "foo")
+ require.NoError(t, err)
+ assert.Equal(t, "foo", name)
+
+ // Next name in the "sequence" for that service type is used.
+ t12, err := namer.pickName("type/1", "")
+ require.NoError(t, err)
+ assert.Equal(t, "type-1-2", t12)
+
+ // First name in sequence for new service type is used.
+ t21, err := namer.pickName("type/2", "")
+ require.NoError(t, err)
+ assert.Equal(t, "type-2-1", t21)
+ })
+
+ t.Run("reserved name", func(t *testing.T) {
+ namer := newServiceNamer()
+
+ _, err := namer.pickName("foo", "heartbeat")
+ require.ErrorContains(t, err, `service name "heartbeat" is reserved for internal use`)
+ })
+
+ t.Run("invalid name", func(t *testing.T) {
+ namer := newServiceNamer()
+
+ _, err := namer.pickName("foo", "hello, world")
+ require.ErrorContains(t, err, `invalid service name: "hello, world", may only contain lowercase letters, numbers, hyphens, underscores, or plus symbols`)
+ })
+
+ t.Run("named used twice", func(t *testing.T) {
+ namer := newServiceNamer()
+
+ _, err := namer.pickName("foo", "bar")
+ require.NoError(t, err)
+
+ _, err = namer.pickName("foo", "bar")
+ require.ErrorContains(t, err, `service name "bar" used more than once`)
+ })
+
+ t.Run("chosen name conflicts with generated name", func(t *testing.T) {
+ namer := newServiceNamer()
+
+ _, err := namer.pickName("foo", "foo-2")
+ require.NoError(t, err)
+
+ _, err = namer.pickName("foo", "")
+ require.ErrorContains(t, err, `service name "foo-2" conflicts with an automatically generated service name`)
+ })
+}
diff --git a/lib/tbot/internal/diagnostics/service.go b/lib/tbot/internal/diagnostics/service.go
index 95fcb7621e5de..39a46a5e1c137 100644
--- a/lib/tbot/internal/diagnostics/service.go
+++ b/lib/tbot/internal/diagnostics/service.go
@@ -36,9 +36,10 @@ import (
// ServiceBuilder returns a builder for the diagnostics service.
func ServiceBuilder(cfg Config) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
return NewService(cfg, deps.StatusRegistry)
}
+ return bot.NewServiceBuilder("internal/diagnostics", "diagnostics", buildFn)
}
// Config contains configuration for the diagnostics service.
@@ -59,7 +60,7 @@ func (cfg *Config) CheckAndSetDefaults() error {
}
// NewService creates a new diagnostics service.
-func NewService(cfg Config, registry *readyz.Registry) (*Service, error) {
+func NewService(cfg Config, registry readyz.ReadOnlyRegistry) (*Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -80,7 +81,7 @@ type Service struct {
log *slog.Logger
diagAddr string
pprofEnabled bool
- statusRegistry *readyz.Registry
+ statusRegistry readyz.ReadOnlyRegistry
}
func (s *Service) String() string {
diff --git a/lib/tbot/internal/heartbeat/service.go b/lib/tbot/internal/heartbeat/service.go
index a868ca1e804fe..936065c0f41a7 100644
--- a/lib/tbot/internal/heartbeat/service.go
+++ b/lib/tbot/internal/heartbeat/service.go
@@ -23,10 +23,10 @@ import (
"log/slog"
"os"
"runtime"
+ "sort"
"time"
"github.com/gravitational/trace"
- "github.com/jonboulle/clockwork"
"google.golang.org/grpc"
"google.golang.org/protobuf/types/known/durationpb"
"google.golang.org/protobuf/types/known/timestamppb"
@@ -38,6 +38,16 @@ import (
"github.com/gravitational/teleport/lib/tbot/readyz"
)
+const (
+ // Maximum amount of time we'll wait for services to report their initial
+ // status before sending the first heartbeat.
+ serviceHealthMaxWait = 30 * time.Second
+
+ // Maximum amount of time the one-shot heartbeat can take once the bot has
+ // started shutting down.
+ shutdownHeartbeatTimeout = 5 * time.Second
+)
+
// Client for the heartbeat service.
type Client interface {
SubmitHeartbeat(
@@ -76,8 +86,9 @@ type Config struct {
// StatusReporter is used to report the service's health.
StatusReporter readyz.Reporter
- // Clock that will be used to determine the current time.
- Clock clockwork.Clock
+ // StatusRegistry is used to fetch the current service statuses when
+ // submitting a heartbeat.
+ StatusRegistry *readyz.Registry
}
// CheckAndSetDefaults checks the service configuration and sets any default values.
@@ -91,12 +102,11 @@ func (cfg *Config) CheckAndSetDefaults() error {
return trace.BadParameter("Client is required")
case cfg.JoinMethod == "":
return trace.BadParameter("JoinMethod is required")
- }
- if cfg.Clock == nil {
- cfg.Clock = clockwork.NewRealClock()
+ case cfg.StatusRegistry == nil:
+ return trace.BadParameter("StatusRegistry is required")
}
if cfg.StartedAt.IsZero() {
- cfg.StartedAt = cfg.Clock.Now()
+ cfg.StartedAt = time.Now()
}
return nil
}
@@ -114,6 +124,13 @@ type Service struct{ cfg Config }
// Run the service in long-running mode, submitting heartbeats periodically.
func (s *Service) Run(ctx context.Context) error {
+ // Wait for service health before sending our first heartbeat. Otherwise, we
+ // might report all services as "initializing" for the first ~30 minutes our
+ // bot is running.
+ if shuttingDown := s.waitForServiceHealth(ctx); shuttingDown {
+ return nil
+ }
+
isStartup := true
err := internal.RunOnInterval(ctx, internal.RunOnIntervalConfig{
Service: s.String(),
@@ -146,6 +163,21 @@ func (s *Service) Run(ctx context.Context) error {
// OneShot submits one heartbeat and then exits.
func (s *Service) OneShot(ctx context.Context) error {
+ // Wait for services to report their health before sending the heartbeat.
+ shuttingDown := s.waitForServiceHealth(ctx)
+
+ if shuttingDown {
+ // If the outer context has been canceled (likely because another
+ // service has return an error) we'll create a new one detached from
+ // the cancellation to try to send the heartbeat.
+ var cancel context.CancelFunc
+ ctx, cancel = context.WithTimeout(
+ context.WithoutCancel(ctx),
+ shutdownHeartbeatTimeout,
+ )
+ defer cancel()
+ }
+
err := s.heartbeat(ctx, true, true)
// Ignore not implemented as this is likely confusing.
// TODO(noah): Remove NotImplemented check at V18 assuming V17 first major
@@ -159,6 +191,24 @@ func (s *Service) OneShot(ctx context.Context) error {
// String implements fmt.Stringer.
func (s *Service) String() string { return "heartbeat" }
+func (s *Service) waitForServiceHealth(ctx context.Context) (shuttingDown bool) {
+ // We must report our own status to avoid blocking ourselves!
+ s.cfg.StatusReporter.Report(readyz.Healthy)
+
+ select {
+ case <-s.cfg.StatusRegistry.AllServicesReported():
+ // All services have reported their status, we're ready!
+ return false
+ case <-time.After(serviceHealthMaxWait):
+ // It's taking too long, give up and start sending heartbeats.
+ return false
+ case <-ctx.Done():
+ // The outer context has been canceled (e.g. another service has exited
+ // or the process has received SIGINT).
+ return true
+ }
+}
+
func (s *Service) heartbeat(ctx context.Context, isOneShot, isStartup bool) error {
s.cfg.Logger.DebugContext(ctx, "Sending heartbeat")
hostName, err := os.Hostname()
@@ -166,7 +216,7 @@ func (s *Service) heartbeat(ctx context.Context, isOneShot, isStartup bool) erro
s.cfg.Logger.WarnContext(ctx, "Failed to determine hostname for heartbeat", "error", err)
}
- now := s.cfg.Clock.Now()
+ now := time.Now()
hb := &machineidv1pb.BotInstanceStatusHeartbeat{
RecordedAt: timestamppb.New(now),
Hostname: hostName,
@@ -180,8 +230,20 @@ func (s *Service) heartbeat(ctx context.Context, isOneShot, isStartup bool) erro
Kind: s.cfg.BotKind,
}
+ status := s.cfg.StatusRegistry.OverallStatus()
+ serviceHealth := make([]*machineidv1pb.BotInstanceServiceHealth, 0, len(status.Services))
+ for name, serviceStatus := range status.Services {
+ serviceHealth = append(serviceHealth, statusToServiceHealth(name, serviceStatus))
+ }
+
+ // Sort the services by name to make tests deterministic.
+ sort.Slice(serviceHealth, func(a, b int) bool {
+ return serviceHealth[a].Service.Name < serviceHealth[b].Service.Name
+ })
+
_, err = s.cfg.Client.SubmitHeartbeat(ctx, &machineidv1pb.SubmitHeartbeatRequest{
- Heartbeat: hb,
+ Heartbeat: hb,
+ ServiceHealth: serviceHealth,
})
if err != nil {
return trace.Wrap(err, "submitting heartbeat")
@@ -190,3 +252,49 @@ func (s *Service) heartbeat(ctx context.Context, isOneShot, isStartup bool) erro
s.cfg.Logger.InfoContext(ctx, "Sent heartbeat", "data", hb.String())
return nil
}
+
+func statusToServiceHealth(name string, status *readyz.ServiceStatus) *machineidv1pb.BotInstanceServiceHealth {
+ health := &machineidv1pb.BotInstanceServiceHealth{
+ Service: &machineidv1pb.BotInstanceServiceIdentifier{
+ Name: trimString(name, 64),
+ Type: status.ServiceType,
+ },
+ }
+
+ switch status.Status {
+ case readyz.Initializing:
+ health.Status = machineidv1pb.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_INITIALIZING
+ case readyz.Healthy:
+ health.Status = machineidv1pb.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_HEALTHY
+ case readyz.Unhealthy:
+ health.Status = machineidv1pb.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY
+ }
+
+ if status.Reason != "" {
+ reason := trimString(status.Reason, 256)
+ health.Reason = &reason
+ }
+
+ if status.UpdatedAt != nil {
+ health.UpdatedAt = timestamppb.New(*status.UpdatedAt)
+ }
+
+ return health
+}
+
+func trimString(s string, maxBytes int) string {
+ if len(s) <= maxBytes {
+ return s
+ }
+
+ // Trim the string to maxBytes, honoring rune boundaries for non-ASCII text.
+ byteCount := 0
+ for i, r := range s {
+ runeSize := len(string(r))
+ if byteCount+runeSize > maxBytes {
+ return s[:i]
+ }
+ byteCount += runeSize
+ }
+ return s
+}
diff --git a/lib/tbot/internal/heartbeat/service_test.go b/lib/tbot/internal/heartbeat/service_test.go
index 8dac3c6f241ee..272116ec554fc 100644
--- a/lib/tbot/internal/heartbeat/service_test.go
+++ b/lib/tbot/internal/heartbeat/service_test.go
@@ -22,11 +22,12 @@ import (
"context"
"os"
"runtime"
+ "strings"
"testing"
+ "testing/synctest"
"time"
"github.com/google/go-cmp/cmp"
- "github.com/jonboulle/clockwork"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
@@ -37,6 +38,7 @@ import (
"github.com/gravitational/teleport"
machineidv1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/machineid/v1"
"github.com/gravitational/teleport/api/types"
+ "github.com/gravitational/teleport/lib/tbot/readyz"
"github.com/gravitational/teleport/lib/utils"
)
@@ -54,56 +56,128 @@ func (f *fakeHeartbeatSubmitter) SubmitHeartbeat(
func TestHeartbeatService(t *testing.T) {
t.Parallel()
- log := utils.NewSlogLoggerForTests()
- ctx, cancel := context.WithCancel(context.Background())
- t.Cleanup(cancel)
-
- fhs := &fakeHeartbeatSubmitter{
- ch: make(chan *machineidv1pb.SubmitHeartbeatRequest, 2),
- }
-
- now := time.Date(2024, time.April, 1, 12, 0, 0, 0, time.UTC)
- svc, err := NewService(Config{
- Interval: time.Second,
- RetryLimit: 3,
- Client: fhs,
- Clock: clockwork.NewFakeClockAt(now),
- StartedAt: time.Date(2024, time.April, 1, 11, 0, 0, 0, time.UTC),
- Logger: log,
- JoinMethod: types.JoinMethodGitHub,
- BotKind: machineidv1pb.BotKind_BOT_KIND_TBOT,
+ synctest.Run(func() {
+ log := utils.NewSlogLoggerForTests()
+ ctx, cancel := context.WithCancel(t.Context())
+ t.Cleanup(cancel)
+
+ fhs := &fakeHeartbeatSubmitter{
+ ch: make(chan *machineidv1pb.SubmitHeartbeatRequest, 2),
+ }
+
+ reg := readyz.NewRegistry()
+
+ svcA := reg.AddService("a", "a")
+ svcB := reg.AddService("b", strings.Repeat("b", 200))
+
+ svc, err := NewService(Config{
+ Interval: time.Second,
+ RetryLimit: 3,
+ Client: fhs,
+ StartedAt: time.Now().Add(-1 * time.Hour),
+ Logger: log,
+ JoinMethod: types.JoinMethodGitHub,
+ StatusReporter: reg.AddService("internal/heartbeat", "heartbeat"),
+ StatusRegistry: reg,
+ BotKind: machineidv1pb.BotKind_BOT_KIND_TBOT,
+ })
+ require.NoError(t, err)
+
+ hostName, err := os.Hostname()
+ require.NoError(t, err)
+
+ errCh := make(chan error, 1)
+ go func() {
+ errCh <- svc.Run(ctx)
+ }()
+
+ synctest.Wait()
+ select {
+ case <-fhs.ch:
+ t.Fatal("should not have received a heartbeat until all services have reported their status")
+ default:
+ }
+
+ svcA.ReportReason(readyz.Unhealthy, "no more bananas")
+ svcB.ReportReason(readyz.Unhealthy, strings.Repeat("b", 300))
+
+ want := &machineidv1pb.SubmitHeartbeatRequest{
+ Heartbeat: &machineidv1pb.BotInstanceStatusHeartbeat{
+ Hostname: hostName,
+ IsStartup: true,
+ OneShot: false,
+ Uptime: durationpb.New(time.Hour),
+ Version: teleport.Version,
+ Architecture: runtime.GOARCH,
+ Os: runtime.GOOS,
+ JoinMethod: string(types.JoinMethodGitHub),
+ Kind: machineidv1pb.BotKind_BOT_KIND_TBOT,
+ },
+ ServiceHealth: []*machineidv1pb.BotInstanceServiceHealth{
+ {
+ Service: &machineidv1pb.BotInstanceServiceIdentifier{
+ Name: "a",
+ Type: "a",
+ },
+ Reason: ptr("no more bananas"),
+ Status: machineidv1pb.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY,
+ UpdatedAt: timestamppb.New(time.Now()),
+ },
+ // Check limits were applied on user-controlled or dynamic fields.
+ {
+ Service: &machineidv1pb.BotInstanceServiceIdentifier{
+ Name: strings.Repeat("b", 64),
+ Type: "b",
+ },
+ Reason: ptr(strings.Repeat("b", 256)),
+ Status: machineidv1pb.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_UNHEALTHY,
+ UpdatedAt: timestamppb.New(time.Now()),
+ },
+ {
+ Service: &machineidv1pb.BotInstanceServiceIdentifier{
+ Name: "heartbeat",
+ Type: "internal/heartbeat",
+ },
+ Status: machineidv1pb.BotInstanceHealthStatus_BOT_INSTANCE_HEALTH_STATUS_HEALTHY,
+ UpdatedAt: timestamppb.New(time.Now()),
+ },
+ },
+ }
+
+ compare := func(t *testing.T, want, got *machineidv1pb.SubmitHeartbeatRequest) {
+ t.Helper()
+
+ assert.Empty(t,
+ cmp.Diff(want, got,
+ protocmp.Transform(),
+ protocmp.IgnoreFields(&machineidv1pb.BotInstanceStatusHeartbeat{}, "recorded_at"),
+ ),
+ )
+ }
+
+ synctest.Wait()
+ select {
+ case got := <-fhs.ch:
+ compare(t, want, got)
+ default:
+ t.Fatal("no heartbeat received")
+ }
+
+ time.Sleep(1 * time.Second)
+ synctest.Wait()
+
+ select {
+ case got := <-fhs.ch:
+ want.Heartbeat.IsStartup = false
+ want.Heartbeat.Uptime = durationpb.New(time.Hour + time.Second)
+ compare(t, want, got)
+ default:
+ t.Fatal("no heartbeat received")
+ }
+
+ cancel()
+ assert.NoError(t, <-errCh)
})
- require.NoError(t, err)
-
- hostName, err := os.Hostname()
- require.NoError(t, err)
-
- errCh := make(chan error, 1)
- go func() {
- errCh <- svc.Run(ctx)
- }()
-
- got := <-fhs.ch
- want := &machineidv1pb.SubmitHeartbeatRequest{
- Heartbeat: &machineidv1pb.BotInstanceStatusHeartbeat{
- RecordedAt: timestamppb.New(now),
- Hostname: hostName,
- IsStartup: true,
- OneShot: false,
- Uptime: durationpb.New(time.Hour),
- Version: teleport.Version,
- Architecture: runtime.GOARCH,
- Os: runtime.GOOS,
- JoinMethod: string(types.JoinMethodGitHub),
- Kind: machineidv1pb.BotKind_BOT_KIND_TBOT,
- },
- }
- assert.Empty(t, cmp.Diff(want, got, protocmp.Transform()))
-
- got = <-fhs.ch
- want.Heartbeat.IsStartup = false
- assert.Empty(t, cmp.Diff(want, got, protocmp.Transform()))
-
- cancel()
- assert.NoError(t, <-errCh)
}
+
+func ptr[T any](v T) *T { return &v }
diff --git a/lib/tbot/internal/identity/service.go b/lib/tbot/internal/identity/service.go
index f5330e0e21f3d..2282ca53c55b9 100644
--- a/lib/tbot/internal/identity/service.go
+++ b/lib/tbot/internal/identity/service.go
@@ -372,7 +372,6 @@ func (s *Service) Initialize(ctx context.Context) error {
s.mu.Unlock()
s.unblockWaiters()
- s.cfg.StatusReporter.Report(readyz.Healthy)
s.log.InfoContext(ctx, "Identity initialized successfully")
return nil
@@ -401,8 +400,10 @@ func (s *Service) Run(ctx context.Context) error {
// etc)
storageDestination := s.cfg.Destination
- // Keep retrying renewal if it failed on startup.
- if !s.IsReady() {
+ if s.IsReady() {
+ s.cfg.StatusReporter.Report(readyz.Healthy)
+ } else {
+ // Keep retrying renewal if it failed on startup.
retry, err := retryutils.NewRetryV2(retryutils.RetryV2Config{
Driver: retryutils.NewExponentialDriver(1 * time.Second),
Max: 1 * time.Minute,
diff --git a/lib/tbot/readyz/http.go b/lib/tbot/readyz/http.go
index f2a9472beec59..139c88b0c3490 100644
--- a/lib/tbot/readyz/http.go
+++ b/lib/tbot/readyz/http.go
@@ -26,9 +26,16 @@ import (
"net/http"
)
+// ReadOnlyRegistry is a version of the Registry which can only be read from,
+// not reported to.
+type ReadOnlyRegistry interface {
+ ServiceStatus(name string) (*ServiceStatus, bool)
+ OverallStatus() *OverallStatus
+}
+
// HTTPHandler returns an HTTP handler that implements tbot's
// /readyz(/{service}) endpoints.
-func HTTPHandler(reg *Registry) http.Handler {
+func HTTPHandler(reg ReadOnlyRegistry) http.Handler {
mux := http.NewServeMux()
mux.Handle("/readyz/{service}", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
diff --git a/lib/tbot/readyz/readyz.go b/lib/tbot/readyz/readyz.go
index 4462c7c1fe4bc..71881c1dfd3f8 100644
--- a/lib/tbot/readyz/readyz.go
+++ b/lib/tbot/readyz/readyz.go
@@ -18,36 +18,69 @@
package readyz
-import "sync"
+import (
+ "sync"
+ "time"
+
+ "github.com/jonboulle/clockwork"
+)
// NewRegistry returns a Registry to track the health of tbot's services.
-func NewRegistry() *Registry {
- return &Registry{
+func NewRegistry(opts ...NewRegistryOpt) *Registry {
+ r := &Registry{
+ clock: clockwork.NewRealClock(),
services: make(map[string]*ServiceStatus),
+ notifyCh: make(chan struct{}),
+ }
+ for _, opt := range opts {
+ opt(r)
}
+ return r
+}
+
+// NewRegistryOpt can be passed to NewRegistry to provide optional configuration.
+type NewRegistryOpt func(r *Registry)
+
+// WithClock sets the registry's clock.
+func WithClock(clock clockwork.Clock) NewRegistryOpt {
+ return func(r *Registry) { r.clock = clock }
}
// Registry tracks the status/health of tbot's services.
type Registry struct {
+ clock clockwork.Clock
+
mu sync.Mutex
services map[string]*ServiceStatus
+ reported int
+ notifyCh chan struct{}
}
// AddService adds a service to the registry so that its health will be reported
// from our readyz endpoints. It returns a Reporter the service can use to report
// status changes.
-func (r *Registry) AddService(name string) Reporter {
+//
+// Note: you should add all of your services before any service reports its status
+// otherwise AllServicesReported will unblock too early.
+func (r *Registry) AddService(serviceType, name string) Reporter {
r.mu.Lock()
defer r.mu.Unlock()
+ // TODO(boxofrad): If you add the same service multiple times, you could end
+ // up unblocking AllServicesReported prematurely. The impact is low, it just
+ // means we'd send a heartbeat sooner than is desirable, but we should panic
+ // or return an error from this method instead.
status, ok := r.services[name]
if !ok {
- status = &ServiceStatus{}
+ status = &ServiceStatus{ServiceType: serviceType}
r.services[name] = status
}
+
return &reporter{
mu: &r.mu,
+ clock: r.clock,
status: status,
+ notify: sync.OnceFunc(r.maybeNotifyLocked),
}
}
@@ -87,6 +120,31 @@ func (r *Registry) OverallStatus() *OverallStatus {
}
}
+// AllServicesReported returns a channel you can receive from to be notified
+// when all registered services have reported their initial status. It provides
+// a way for us to hold the initial heartbeat until after the initial flurry of
+// activity.
+func (r *Registry) AllServicesReported() <-chan struct{} { return r.notifyCh }
+
+// maybeNotifyLocked unblocks the AllServicesReported channel if all services
+// have reported their initial status. It's called by each of the Reporters the
+// first time you report a status.
+//
+// Caller must be holding r.mu.
+func (r *Registry) maybeNotifyLocked() {
+ r.reported++
+
+ if r.reported != len(r.services) {
+ return
+ }
+
+ select {
+ case <-r.notifyCh:
+ default:
+ close(r.notifyCh)
+ }
+}
+
// ServiceStatus is a snapshot of the service's status.
type ServiceStatus struct {
// Status of the service.
@@ -94,6 +152,12 @@ type ServiceStatus struct {
// Reason string describing why the service has its current status.
Reason string `json:"reason,omitempty"`
+
+ // UpdatedAt is the time at which the service's status last changed.
+ UpdatedAt *time.Time `json:"updated_at"`
+
+ // ServiceType is exposed in bot heartbeats, but not the `/readyz` endpoint.
+ ServiceType string `json:"-"`
}
// Clone the status to avoid data races.
diff --git a/lib/tbot/readyz/readyz_test.go b/lib/tbot/readyz/readyz_test.go
index 40641ab9d4a01..4a43a0fec9126 100644
--- a/lib/tbot/readyz/readyz_test.go
+++ b/lib/tbot/readyz/readyz_test.go
@@ -23,7 +23,9 @@ import (
"net/http"
"net/http/httptest"
"testing"
+ "time"
+ "github.com/jonboulle/clockwork"
"github.com/stretchr/testify/require"
"github.com/gravitational/teleport/lib/tbot/readyz"
@@ -32,10 +34,13 @@ import (
func TestReadyz(t *testing.T) {
t.Parallel()
- reg := readyz.NewRegistry()
+ now := time.Now().UTC().Truncate(time.Second)
+ clock := clockwork.NewFakeClockAt(now)
+
+ reg := readyz.NewRegistry(readyz.WithClock(clock))
- a := reg.AddService("a")
- b := reg.AddService("b")
+ a := reg.AddService("svc", "a")
+ b := reg.AddService("svc", "b")
srv := httptest.NewServer(readyz.HTTPHandler(reg))
srv.URL = srv.URL + "/readyz"
@@ -79,8 +84,9 @@ func TestReadyz(t *testing.T) {
require.Equal(t,
readyz.ServiceStatus{
- Status: readyz.Unhealthy,
- Reason: "database is down",
+ Status: readyz.Unhealthy,
+ Reason: "database is down",
+ UpdatedAt: &now,
},
response,
)
@@ -104,8 +110,8 @@ func TestReadyz(t *testing.T) {
readyz.OverallStatus{
Status: readyz.Unhealthy,
Services: map[string]*readyz.ServiceStatus{
- "a": {Status: readyz.Healthy},
- "b": {Status: readyz.Unhealthy, Reason: "database is down"},
+ "a": {Status: readyz.Healthy, UpdatedAt: &now},
+ "b": {Status: readyz.Unhealthy, Reason: "database is down", UpdatedAt: &now},
},
},
response,
@@ -130,8 +136,8 @@ func TestReadyz(t *testing.T) {
readyz.OverallStatus{
Status: readyz.Healthy,
Services: map[string]*readyz.ServiceStatus{
- "a": {Status: readyz.Healthy},
- "b": {Status: readyz.Healthy},
+ "a": {Status: readyz.Healthy, UpdatedAt: &now},
+ "b": {Status: readyz.Healthy, UpdatedAt: &now},
},
},
response,
@@ -146,3 +152,32 @@ func TestReadyz(t *testing.T) {
require.Equal(t, http.StatusNotFound, rsp.StatusCode)
})
}
+
+func TestAllServicesReported(t *testing.T) {
+ reg := readyz.NewRegistry()
+
+ a := reg.AddService("svc", "a")
+ b := reg.AddService("svc", "b")
+
+ select {
+ case <-reg.AllServicesReported():
+ t.Fatal("AllServicesReported should be blocked")
+ default:
+ }
+
+ a.Report(readyz.Healthy)
+
+ select {
+ case <-reg.AllServicesReported():
+ t.Fatal("AllServicesReported should be blocked")
+ default:
+ }
+
+ b.Report(readyz.Unhealthy)
+
+ select {
+ case <-reg.AllServicesReported():
+ default:
+ t.Fatal("AllServicesReported should not be blocked")
+ }
+}
diff --git a/lib/tbot/readyz/reporter.go b/lib/tbot/readyz/reporter.go
index 3620bb96ef3fb..7a8d2ff8e7224 100644
--- a/lib/tbot/readyz/reporter.go
+++ b/lib/tbot/readyz/reporter.go
@@ -18,7 +18,11 @@
package readyz
-import "sync"
+import (
+ "sync"
+
+ "github.com/jonboulle/clockwork"
+)
// Reporter can be used by a service to report its status.
type Reporter interface {
@@ -32,6 +36,8 @@ type Reporter interface {
type reporter struct {
mu *sync.Mutex
status *ServiceStatus
+ clock clockwork.Clock
+ notify func()
}
func (r *reporter) Report(status Status) {
@@ -44,6 +50,11 @@ func (r *reporter) ReportReason(status Status, reason string) {
r.status.Status = status
r.status.Reason = reason
+
+ updatedAt := r.clock.Now()
+ r.status.UpdatedAt = &updatedAt
+
+ r.notify()
}
// NoopReporter returns a no-op Reporter that can be used when no real reporter
diff --git a/lib/tbot/services/application/output_config.go b/lib/tbot/services/application/output_config.go
index 773dde76b3729..a7e900b6a4f40 100644
--- a/lib/tbot/services/application/output_config.go
+++ b/lib/tbot/services/application/output_config.go
@@ -76,6 +76,11 @@ func (o *OutputConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *OutputConfig) SetName(name string) {
+ o.Name = name
+}
+
func (o *OutputConfig) GetDestination() destination.Destination {
return o.Destination
}
diff --git a/lib/tbot/services/application/output_service.go b/lib/tbot/services/application/output_service.go
index 4e619dca0eef0..77fc6e383c5e8 100644
--- a/lib/tbot/services/application/output_service.go
+++ b/lib/tbot/services/application/output_service.go
@@ -38,7 +38,7 @@ import (
)
func OutputServiceBuilder(cfg *OutputConfig, defaultCredentialLifetime bot.CredentialLifetime) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -51,11 +51,12 @@ func OutputServiceBuilder(cfg *OutputConfig, defaultCredentialLifetime bot.Crede
reloadCh: deps.ReloadCh,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(OutputServiceType, cfg.Name, buildFn)
}
// OutputService generates the artifacts necessary to connect to a
diff --git a/lib/tbot/services/application/proxy_config.go b/lib/tbot/services/application/proxy_config.go
index 58382711d5af1..17c4942fb25ca 100644
--- a/lib/tbot/services/application/proxy_config.go
+++ b/lib/tbot/services/application/proxy_config.go
@@ -56,6 +56,11 @@ func (c *ProxyServiceConfig) GetName() string {
return c.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *ProxyServiceConfig) SetName(name string) {
+ o.Name = name
+}
+
// Type returns the type of the service.
func (c *ProxyServiceConfig) Type() string {
return ProxyServiceType
diff --git a/lib/tbot/services/application/proxy_service.go b/lib/tbot/services/application/proxy_service.go
index 53c638b0a9168..5a7ae14b41444 100644
--- a/lib/tbot/services/application/proxy_service.go
+++ b/lib/tbot/services/application/proxy_service.go
@@ -52,7 +52,7 @@ func ProxyServiceBuilder(
defaultCredentialLifetime bot.CredentialLifetime,
alpnUpgradeCache *internal.ALPNUpgradeCache,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -67,12 +67,12 @@ func ProxyServiceBuilder(
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
alpnUpgradeCache: alpnUpgradeCache,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
-
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(ProxyServiceType, cfg.Name, buildFn)
}
// ProxyService presents a http_proxy compatible proxy on a listener which will
diff --git a/lib/tbot/services/application/tunnel_config.go b/lib/tbot/services/application/tunnel_config.go
index 4acf6df78fb6f..1882f3ed8969f 100644
--- a/lib/tbot/services/application/tunnel_config.go
+++ b/lib/tbot/services/application/tunnel_config.go
@@ -61,6 +61,11 @@ func (o *TunnelConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *TunnelConfig) SetName(name string) {
+ o.Name = name
+}
+
func (s *TunnelConfig) Type() string {
return TunnelServiceType
}
diff --git a/lib/tbot/services/application/tunnel_service.go b/lib/tbot/services/application/tunnel_service.go
index 073ab782ff795..8a7ad75c40276 100644
--- a/lib/tbot/services/application/tunnel_service.go
+++ b/lib/tbot/services/application/tunnel_service.go
@@ -45,7 +45,7 @@ func TunnelServiceBuilder(
connCfg connection.Config,
defaultCredentialLifetime bot.CredentialLifetime,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -59,11 +59,12 @@ func TunnelServiceBuilder(
cfg: cfg,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(TunnelServiceType, cfg.Name, buildFn)
}
// TunnelService is a service that listens on a socket and forwards
diff --git a/lib/tbot/services/awsra/config.go b/lib/tbot/services/awsra/config.go
index c69517f58e1b4..179dc516cc2a5 100644
--- a/lib/tbot/services/awsra/config.go
+++ b/lib/tbot/services/awsra/config.go
@@ -102,6 +102,11 @@ func (o *Config) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *Config) SetName(name string) {
+ o.Name = name
+}
+
// Init initializes the destination.
func (o *Config) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
diff --git a/lib/tbot/services/awsra/service.go b/lib/tbot/services/awsra/service.go
index 3fa7dbd5c77d3..fa3df8bf8490d 100644
--- a/lib/tbot/services/awsra/service.go
+++ b/lib/tbot/services/awsra/service.go
@@ -49,7 +49,7 @@ import (
var tracer = otel.Tracer("github.com/gravitational/teleport/lib/tbot/services/awsra")
func ServiceBuilder(cfg *Config) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -60,11 +60,12 @@ func ServiceBuilder(cfg *Config) bot.ServiceBuilder {
reloadCh: deps.ReloadCh,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(ServiceType, cfg.Name, buildFn)
}
// Service is a service that retrieves X.509 certificates and exchanges them for
diff --git a/lib/tbot/services/clientcredentials/config.go b/lib/tbot/services/clientcredentials/config.go
index 6b039b0187666..aebe229097428 100644
--- a/lib/tbot/services/clientcredentials/config.go
+++ b/lib/tbot/services/clientcredentials/config.go
@@ -61,6 +61,11 @@ func (o *UnstableConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *UnstableConfig) SetName(name string) {
+ o.Name = name
+}
+
// Ready returns a channel which closes when the Output is ready to be used
// as a client credential. Using this as a credential before Ready closes is
// unsupported.
diff --git a/lib/tbot/services/clientcredentials/service.go b/lib/tbot/services/clientcredentials/service.go
index 79bab6cbcae5f..5e7f66b1c1282 100644
--- a/lib/tbot/services/clientcredentials/service.go
+++ b/lib/tbot/services/clientcredentials/service.go
@@ -38,7 +38,7 @@ import (
// Note: when using the client credentials service to provide credentials to
// another service (e.g. the SPIFFE Workload API service) use NewSidecar instead.
func ServiceBuilder(cfg *UnstableConfig, credentialLifetime bot.CredentialLifetime) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -49,11 +49,12 @@ func ServiceBuilder(cfg *UnstableConfig, credentialLifetime bot.CredentialLifeti
cfg: cfg,
reloadCh: deps.ReloadCh,
identityGenerator: deps.IdentityGenerator,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(ServiceType, cfg.Name, buildFn)
}
// NewSidecar creates a client credential service intended to provide credentials
@@ -68,8 +69,8 @@ func NewSidecar(deps bot.ServiceDependencies, credentialLifetime bot.CredentialL
reloadCh: deps.ReloadCh,
identityGenerator: deps.IdentityGenerator,
statusReporter: readyz.NoopReporter(),
+ log: deps.Logger,
}
- svc.log = deps.LoggerForService(svc)
return svc, cfg
}
diff --git a/lib/tbot/services/database/output_config.go b/lib/tbot/services/database/output_config.go
index d48aba8d2970b..6d506107df829 100644
--- a/lib/tbot/services/database/output_config.go
+++ b/lib/tbot/services/database/output_config.go
@@ -104,6 +104,11 @@ func (o *OutputConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *OutputConfig) SetName(name string) {
+ o.Name = name
+}
+
func (o *OutputConfig) Init(ctx context.Context) error {
subDirs := []string{}
if o.Format == CockroachDatabaseFormat {
diff --git a/lib/tbot/services/database/output_service.go b/lib/tbot/services/database/output_service.go
index 1a1c045a9bf0e..fa95c65a303fd 100644
--- a/lib/tbot/services/database/output_service.go
+++ b/lib/tbot/services/database/output_service.go
@@ -38,7 +38,7 @@ import (
)
func OutputServiceBuilder(cfg *OutputConfig, defaultCredentialLifetime bot.CredentialLifetime) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -50,11 +50,12 @@ func OutputServiceBuilder(cfg *OutputConfig, defaultCredentialLifetime bot.Crede
reloadCh: deps.ReloadCh,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(OutputServiceType, cfg.Name, buildFn)
}
// OutputService generates the artifacts necessary to connect to a
diff --git a/lib/tbot/services/database/tunnel_config.go b/lib/tbot/services/database/tunnel_config.go
index db1bf39380dc2..6f89611a7fc41 100644
--- a/lib/tbot/services/database/tunnel_config.go
+++ b/lib/tbot/services/database/tunnel_config.go
@@ -65,6 +65,11 @@ func (o *TunnelConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *TunnelConfig) SetName(name string) {
+ o.Name = name
+}
+
func (s *TunnelConfig) Type() string {
return TunnelServiceType
}
diff --git a/lib/tbot/services/database/tunnel_service.go b/lib/tbot/services/database/tunnel_service.go
index c152718fda570..db44db51b82ab 100644
--- a/lib/tbot/services/database/tunnel_service.go
+++ b/lib/tbot/services/database/tunnel_service.go
@@ -46,7 +46,7 @@ func TunnelServiceBuilder(
connCfg connection.Config,
defaultCredentialLifetime bot.CredentialLifetime,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -60,11 +60,12 @@ func TunnelServiceBuilder(
botIdentityReadyCh: deps.BotIdentityReadyCh,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(TunnelServiceType, cfg.Name, buildFn)
}
// TunnelService is a service that listens on a local port and forwards
diff --git a/lib/tbot/services/example/config.go b/lib/tbot/services/example/config.go
index c04c1a2d1881e..174234e4f33ee 100644
--- a/lib/tbot/services/example/config.go
+++ b/lib/tbot/services/example/config.go
@@ -41,6 +41,11 @@ func (s *Config) GetName() string {
return s.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *Config) SetName(name string) {
+ o.Name = name
+}
+
func (s *Config) Type() string {
return ServiceType
}
diff --git a/lib/tbot/services/example/service.go b/lib/tbot/services/example/service.go
index 5117f1faec1df..4c4a00f3fcf9b 100644
--- a/lib/tbot/services/example/service.go
+++ b/lib/tbot/services/example/service.go
@@ -31,12 +31,13 @@ import (
// ServiceBuilder returns an example service builder.
func ServiceBuilder(cfg *Config) bot.ServiceBuilder {
- return func(bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
return &Service{cfg: cfg}, nil
}
+ return bot.NewServiceBuilder(ServiceType, cfg.Name, buildFn)
}
// Service is a temporary example service for testing purposes. It is not
diff --git a/lib/tbot/services/identity/output.go b/lib/tbot/services/identity/output.go
index 676a2dd26150e..e728caf8ee3cb 100644
--- a/lib/tbot/services/identity/output.go
+++ b/lib/tbot/services/identity/output.go
@@ -50,7 +50,7 @@ func OutputServiceBuilder(
defaultCredentialLifetime bot.CredentialLifetime,
insecure, fips bool,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -67,11 +67,12 @@ func OutputServiceBuilder(
proxyPinger: deps.ProxyPinger,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(OutputServiceType, cfg.Name, buildFn)
}
// OutputService produces credentials which can be used to connect to
diff --git a/lib/tbot/services/identity/output_config.go b/lib/tbot/services/identity/output_config.go
index d6656d318c4b3..6adb4b48e6b49 100644
--- a/lib/tbot/services/identity/output_config.go
+++ b/lib/tbot/services/identity/output_config.go
@@ -101,6 +101,11 @@ func (o *OutputConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *OutputConfig) SetName(name string) {
+ o.Name = name
+}
+
func (o *OutputConfig) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
}
diff --git a/lib/tbot/services/k8s/argocd_output.go b/lib/tbot/services/k8s/argocd_output.go
index 3f9fe81a03fe9..ef7089838af19 100644
--- a/lib/tbot/services/k8s/argocd_output.go
+++ b/lib/tbot/services/k8s/argocd_output.go
@@ -52,7 +52,7 @@ import (
// ArgoCDServiceBuilder builds a new ArgoCDOutput.
func ArgoCDServiceBuilder(cfg *ArgoCDOutputConfig, opts ...ArgoCDServiceOption) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -66,6 +66,8 @@ func ArgoCDServiceBuilder(cfg *ArgoCDOutputConfig, opts ...ArgoCDServiceOption)
clientBuilder: deps.ClientBuilder,
reloadCh: deps.ReloadCh,
botIdentityReadyCh: deps.BotIdentityReadyCh,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
for _, opt := range opts {
@@ -81,15 +83,13 @@ func ArgoCDServiceBuilder(cfg *ArgoCDOutputConfig, opts ...ArgoCDServiceOption)
}
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
-
if svc.alpnUpgradeCache == nil {
svc.alpnUpgradeCache = internal.NewALPNUpgradeCache(svc.log)
}
return svc, nil
}
+ return bot.NewServiceBuilder(ArgoCDOutputServiceType, cfg.Name, buildFn)
}
// ArgoCDServiceOption is an option that can be provided to customize the service.
diff --git a/lib/tbot/services/k8s/argocd_output_config.go b/lib/tbot/services/k8s/argocd_output_config.go
index e4eeb513b095f..125ac55fa5f17 100644
--- a/lib/tbot/services/k8s/argocd_output_config.go
+++ b/lib/tbot/services/k8s/argocd_output_config.go
@@ -96,6 +96,11 @@ func (o *ArgoCDOutputConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *ArgoCDOutputConfig) SetName(name string) {
+ o.Name = name
+}
+
// CheckAndSetDefaults validates the service configuration and sets any default
// values.
func (o *ArgoCDOutputConfig) CheckAndSetDefaults() error {
diff --git a/lib/tbot/services/k8s/output_v1.go b/lib/tbot/services/k8s/output_v1.go
index e847cbf4402e7..086680dc1f337 100644
--- a/lib/tbot/services/k8s/output_v1.go
+++ b/lib/tbot/services/k8s/output_v1.go
@@ -52,7 +52,7 @@ import (
const defaultKubeconfigPath = "kubeconfig.yaml"
func OutputV1ServiceBuilder(cfg *OutputV1Config, opts ...OutputV1Option) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -66,14 +66,15 @@ func OutputV1ServiceBuilder(cfg *OutputV1Config, opts ...OutputV1Option) bot.Ser
executablePath: autoupdate.StableExecutable,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
for _, opt := range opts {
opt.applyToV1Output(svc)
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(OutputV1ServiceType, cfg.Name, buildFn)
}
// OutputV1Option is an option that can be provided to customize the service.
diff --git a/lib/tbot/services/k8s/output_v1_config.go b/lib/tbot/services/k8s/output_v1_config.go
index 3cfd1313f74ba..993ba8029e608 100644
--- a/lib/tbot/services/k8s/output_v1_config.go
+++ b/lib/tbot/services/k8s/output_v1_config.go
@@ -65,6 +65,11 @@ func (o *OutputV1Config) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *OutputV1Config) SetName(name string) {
+ o.Name = name
+}
+
func (o *OutputV1Config) CheckAndSetDefaults() error {
if o.Destination == nil {
return trace.BadParameter("no destination configured for output")
diff --git a/lib/tbot/services/k8s/output_v2.go b/lib/tbot/services/k8s/output_v2.go
index a06a8dd8cf018..e44c6b4373749 100644
--- a/lib/tbot/services/k8s/output_v2.go
+++ b/lib/tbot/services/k8s/output_v2.go
@@ -52,7 +52,7 @@ import (
)
func OutputV2ServiceBuilder(cfg *OutputV2Config, opts ...OutputV2Option) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -66,14 +66,15 @@ func OutputV2ServiceBuilder(cfg *OutputV2Config, opts ...OutputV2Option) bot.Ser
executablePath: autoupdate.StableExecutable,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
for _, opt := range opts {
opt.applyToV2Output(svc)
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(OutputV2ServiceType, cfg.Name, buildFn)
}
// OutputV1Option is an option that can be provided to customize the service.
diff --git a/lib/tbot/services/k8s/output_v2_config.go b/lib/tbot/services/k8s/output_v2_config.go
index 10d86cc1b0970..0002957724f9b 100644
--- a/lib/tbot/services/k8s/output_v2_config.go
+++ b/lib/tbot/services/k8s/output_v2_config.go
@@ -75,6 +75,11 @@ func (o *OutputV2Config) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *OutputV2Config) SetName(name string) {
+ o.Name = name
+}
+
func (o *OutputV2Config) CheckAndSetDefaults() error {
if o.Destination == nil {
return trace.BadParameter("no destination configured for output")
diff --git a/lib/tbot/services/legacyspiffe/svid_output.go b/lib/tbot/services/legacyspiffe/svid_output.go
index 12683ae09bce0..29414f3d28f94 100644
--- a/lib/tbot/services/legacyspiffe/svid_output.go
+++ b/lib/tbot/services/legacyspiffe/svid_output.go
@@ -49,7 +49,7 @@ func SVIDOutputServiceBuilder(
trustBundleCache TrustBundleGetter,
defaultCredentialLifetime bot.CredentialLifetime,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -61,11 +61,12 @@ func SVIDOutputServiceBuilder(
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
trustBundleCache: trustBundleCache,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(SVIDOutputServiceType, cfg.Name, buildFn)
}
// SVIDOutputService is a service that generates and writes X509 SPIFFE
diff --git a/lib/tbot/services/legacyspiffe/svid_output_config.go b/lib/tbot/services/legacyspiffe/svid_output_config.go
index 667a835aa21c0..663968bb987ab 100644
--- a/lib/tbot/services/legacyspiffe/svid_output_config.go
+++ b/lib/tbot/services/legacyspiffe/svid_output_config.go
@@ -126,6 +126,11 @@ func (o *SVIDOutputConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *SVIDOutputConfig) SetName(name string) {
+ o.Name = name
+}
+
// Init initializes the destination.
func (o *SVIDOutputConfig) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
diff --git a/lib/tbot/services/legacyspiffe/workload_api.go b/lib/tbot/services/legacyspiffe/workload_api.go
index 6f6c1a4a9301a..159fbefe43839 100644
--- a/lib/tbot/services/legacyspiffe/workload_api.go
+++ b/lib/tbot/services/legacyspiffe/workload_api.go
@@ -68,7 +68,7 @@ func WorkloadAPIServiceBuilder(
trustBundleCache TrustBundleGetter,
defaultCredentialLifetime bot.CredentialLifetime,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -79,11 +79,12 @@ func WorkloadAPIServiceBuilder(
cfg: cfg,
trustBundleCache: trustBundleCache,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return bot.NewServicePair(svc, sidecar), nil
}
+ return bot.NewServiceBuilder(WorkloadAPIServiceType, cfg.Name, buildFn)
}
type TrustBundleGetter interface {
diff --git a/lib/tbot/services/legacyspiffe/workload_api_config.go b/lib/tbot/services/legacyspiffe/workload_api_config.go
index f76f10321fbef..87f7b1b2f2450 100644
--- a/lib/tbot/services/legacyspiffe/workload_api_config.go
+++ b/lib/tbot/services/legacyspiffe/workload_api_config.go
@@ -140,6 +140,11 @@ func (o *WorkloadAPIConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *WorkloadAPIConfig) SetName(name string) {
+ o.Name = name
+}
+
func (s *WorkloadAPIConfig) Type() string {
return WorkloadAPIServiceType
}
diff --git a/lib/tbot/services/legacyspiffe/workload_api_test.go b/lib/tbot/services/legacyspiffe/workload_api_test.go
index dc86a757aa51c..68fe9e77a91a7 100644
--- a/lib/tbot/services/legacyspiffe/workload_api_test.go
+++ b/lib/tbot/services/legacyspiffe/workload_api_test.go
@@ -51,6 +51,7 @@ import (
"github.com/gravitational/teleport/lib/tbot/bot/destination"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
"github.com/gravitational/teleport/lib/tbot/workloadidentity/attrs"
+ "github.com/gravitational/teleport/lib/tbot/workloadidentity/workloadattest"
"github.com/gravitational/teleport/lib/utils"
libtestutils "github.com/gravitational/teleport/lib/utils/testutils"
"github.com/gravitational/teleport/tool/teleport/testenv"
@@ -507,7 +508,7 @@ func TestBotSPIFFEWorkloadAPI(t *testing.T) {
Onboarding: *onboarding,
InternalStorage: destination.NewMemory(),
Services: []bot.ServiceBuilder{
- trustBundleCache.BuildService,
+ trustBundleCache.Builder(),
WorkloadAPIServiceBuilder(
&WorkloadAPIConfig{
Listen: socketPath,
@@ -545,6 +546,11 @@ func TestBotSPIFFEWorkloadAPI(t *testing.T) {
},
},
},
+ Attestors: workloadattest.Config{
+ Unix: workloadattest.UnixAttestorConfig{
+ BinaryHashMaxSizeBytes: workloadattest.TestBinaryHashMaxBytes,
+ },
+ },
},
trustBundleCache,
bot.DefaultCredentialLifetime,
@@ -722,7 +728,7 @@ func Test_E2E_SPIFFE_SDS(t *testing.T) {
Onboarding: *onboarding,
InternalStorage: destination.NewMemory(),
Services: []bot.ServiceBuilder{
- trustBundleCache.BuildService,
+ trustBundleCache.Builder(),
WorkloadAPIServiceBuilder(
&WorkloadAPIConfig{
Listen: socketPath,
@@ -760,6 +766,11 @@ func Test_E2E_SPIFFE_SDS(t *testing.T) {
},
},
},
+ Attestors: workloadattest.Config{
+ Unix: workloadattest.UnixAttestorConfig{
+ BinaryHashMaxSizeBytes: workloadattest.TestBinaryHashMaxBytes,
+ },
+ },
},
trustBundleCache,
bot.DefaultCredentialLifetime,
diff --git a/lib/tbot/services/ssh/host_output.go b/lib/tbot/services/ssh/host_output.go
index 81390571c89ef..429531e5d1ec7 100644
--- a/lib/tbot/services/ssh/host_output.go
+++ b/lib/tbot/services/ssh/host_output.go
@@ -44,7 +44,7 @@ import (
)
func HostOutputServiceBuilder(cfg *HostOutputConfig, defaultCredentialLifetime bot.CredentialLifetime) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -56,11 +56,12 @@ func HostOutputServiceBuilder(cfg *HostOutputConfig, defaultCredentialLifetime b
reloadCh: deps.ReloadCh,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(HostOutputServiceType, cfg.Name, buildFn)
}
type HostOutputService struct {
diff --git a/lib/tbot/services/ssh/host_output_config.go b/lib/tbot/services/ssh/host_output_config.go
index 4467a92741229..92f180b30e5db 100644
--- a/lib/tbot/services/ssh/host_output_config.go
+++ b/lib/tbot/services/ssh/host_output_config.go
@@ -68,6 +68,11 @@ func (o *HostOutputConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *HostOutputConfig) SetName(name string) {
+ o.Name = name
+}
+
func (o *HostOutputConfig) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
}
diff --git a/lib/tbot/services/ssh/multiplexer.go b/lib/tbot/services/ssh/multiplexer.go
index 9db7e1ac12f66..d5a736e9166bb 100644
--- a/lib/tbot/services/ssh/multiplexer.go
+++ b/lib/tbot/services/ssh/multiplexer.go
@@ -99,7 +99,7 @@ func MultiplexerServiceBuilder(
defaultCredentialLifetime bot.CredentialLifetime,
clientMetrics *grpcprom.ClientMetrics,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -115,11 +115,12 @@ func MultiplexerServiceBuilder(
reloadCh: deps.ReloadCh,
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(MultiplexerServiceType, cfg.Name, buildFn)
}
// MultiplexerService is a long-lived local SSH proxy. It listens on a local Unix
diff --git a/lib/tbot/services/ssh/multiplexer_config.go b/lib/tbot/services/ssh/multiplexer_config.go
index 36da4466da037..1aa479f918153 100644
--- a/lib/tbot/services/ssh/multiplexer_config.go
+++ b/lib/tbot/services/ssh/multiplexer_config.go
@@ -64,6 +64,11 @@ func (o *MultiplexerConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *MultiplexerConfig) SetName(name string) {
+ o.Name = name
+}
+
func (s *MultiplexerConfig) SessionResumptionEnabled() bool {
if s.EnableResumption == nil {
return true
diff --git a/lib/tbot/services/workloadidentity/jwt_output.go b/lib/tbot/services/workloadidentity/jwt_output.go
index 37ff41f125569..1cf4fc75c2ff4 100644
--- a/lib/tbot/services/workloadidentity/jwt_output.go
+++ b/lib/tbot/services/workloadidentity/jwt_output.go
@@ -42,7 +42,7 @@ func JWTOutputServiceBuilder(
trustBundleCache TrustBundleGetter,
defaultCredentialLifetime bot.CredentialLifetime,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -54,11 +54,12 @@ func JWTOutputServiceBuilder(
identityGenerator: deps.IdentityGenerator,
clientBuilder: deps.ClientBuilder,
trustBundleCache: trustBundleCache,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(JWTOutputServiceType, cfg.Name, buildFn)
}
// JWTOutputService is a service that retrieves JWT workload identity
diff --git a/lib/tbot/services/workloadidentity/jwt_output_config.go b/lib/tbot/services/workloadidentity/jwt_output_config.go
index 624ebd204cc20..0af5b54c4674e 100644
--- a/lib/tbot/services/workloadidentity/jwt_output_config.go
+++ b/lib/tbot/services/workloadidentity/jwt_output_config.go
@@ -53,6 +53,11 @@ func (o JWTOutputConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *JWTOutputConfig) SetName(name string) {
+ o.Name = name
+}
+
// Init initializes the destination.
func (o *JWTOutputConfig) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
diff --git a/lib/tbot/services/workloadidentity/workload_api.go b/lib/tbot/services/workloadidentity/workload_api.go
index 7e75fd3598c95..23a26f00d936c 100644
--- a/lib/tbot/services/workloadidentity/workload_api.go
+++ b/lib/tbot/services/workloadidentity/workload_api.go
@@ -64,7 +64,7 @@ func WorkloadAPIServiceBuilder(
crlCache CRLGetter,
defaultCredentialLifetime bot.CredentialLifetime,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -76,11 +76,12 @@ func WorkloadAPIServiceBuilder(
trustBundleCache: trustBundleCache,
crlCache: crlCache,
clientBuilder: deps.ClientBuilder,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return bot.NewServicePair(svc, sidecar), nil
}
+ return bot.NewServiceBuilder(WorkloadAPIServiceType, cfg.Name, buildFn)
}
// WorkloadAPIService implements a gRPC server that fulfills the SPIFFE
diff --git a/lib/tbot/services/workloadidentity/workload_api_config.go b/lib/tbot/services/workloadidentity/workload_api_config.go
index c438d255d70e9..d3e69fb89fc63 100644
--- a/lib/tbot/services/workloadidentity/workload_api_config.go
+++ b/lib/tbot/services/workloadidentity/workload_api_config.go
@@ -64,6 +64,11 @@ func (o *WorkloadAPIConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *WorkloadAPIConfig) SetName(name string) {
+ o.Name = name
+}
+
// Type returns the type of the service.
func (o *WorkloadAPIConfig) Type() string {
return WorkloadAPIServiceType
diff --git a/lib/tbot/services/workloadidentity/workload_api_test.go b/lib/tbot/services/workloadidentity/workload_api_test.go
index 64b50d60e1341..866cf4d71fd03 100644
--- a/lib/tbot/services/workloadidentity/workload_api_test.go
+++ b/lib/tbot/services/workloadidentity/workload_api_test.go
@@ -43,6 +43,7 @@ import (
"github.com/gravitational/teleport/lib/tbot/bot"
"github.com/gravitational/teleport/lib/tbot/bot/connection"
"github.com/gravitational/teleport/lib/tbot/workloadidentity"
+ "github.com/gravitational/teleport/lib/tbot/workloadidentity/workloadattest"
"github.com/gravitational/teleport/lib/utils"
"github.com/gravitational/teleport/tool/teleport/testenv"
)
@@ -130,14 +131,19 @@ func TestBotWorkloadIdentityAPI(t *testing.T) {
Logger: log,
Onboarding: *onboarding,
Services: []bot.ServiceBuilder{
- trustBundleCache.BuildService,
- crlCache.BuildService,
+ trustBundleCache.Builder(),
+ crlCache.Builder(),
WorkloadAPIServiceBuilder(
&WorkloadAPIConfig{
Selector: bot.WorkloadIdentitySelector{
Name: workloadIdentity.GetMetadata().GetName(),
},
Listen: listenAddr.String(),
+ Attestors: workloadattest.Config{
+ Unix: workloadattest.UnixAttestorConfig{
+ BinaryHashMaxSizeBytes: workloadattest.TestBinaryHashMaxBytes,
+ },
+ },
},
trustBundleCache,
crlCache,
diff --git a/lib/tbot/services/workloadidentity/x509_output.go b/lib/tbot/services/workloadidentity/x509_output.go
index d3b753e4458b9..bdc012d7ece06 100644
--- a/lib/tbot/services/workloadidentity/x509_output.go
+++ b/lib/tbot/services/workloadidentity/x509_output.go
@@ -55,7 +55,7 @@ func X509OutputServiceBuilder(
crlCache CRLGetter,
defaultCredentialLifetime bot.CredentialLifetime,
) bot.ServiceBuilder {
- return func(deps bot.ServiceDependencies) (bot.Service, error) {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
if err := cfg.CheckAndSetDefaults(); err != nil {
return nil, trace.Wrap(err)
}
@@ -68,11 +68,12 @@ func X509OutputServiceBuilder(
clientBuilder: deps.ClientBuilder,
trustBundleCache: trustBundleCache,
crlCache: crlCache,
+ log: deps.Logger,
+ statusReporter: deps.GetStatusReporter(),
}
- svc.log = deps.LoggerForService(svc)
- svc.statusReporter = deps.StatusRegistry.AddService(svc.String())
return svc, nil
}
+ return bot.NewServiceBuilder(X509OutputServiceType, cfg.Name, buildFn)
}
// X509OutputService is a service that retrieves X.509 certificates
diff --git a/lib/tbot/services/workloadidentity/x509_output_config.go b/lib/tbot/services/workloadidentity/x509_output_config.go
index ea32713bf5c83..cc2f7b186b0b2 100644
--- a/lib/tbot/services/workloadidentity/x509_output_config.go
+++ b/lib/tbot/services/workloadidentity/x509_output_config.go
@@ -54,6 +54,11 @@ func (o X509OutputConfig) GetName() string {
return o.Name
}
+// SetName sets the service's name to an automatically generated one.
+func (o *X509OutputConfig) SetName(name string) {
+ o.Name = name
+}
+
// Init initializes the destination.
func (o *X509OutputConfig) Init(ctx context.Context) error {
return trace.Wrap(o.Destination.Init(ctx, []string{}))
diff --git a/lib/tbot/tbot.go b/lib/tbot/tbot.go
index 9e8444f65cb58..e6567d0e150a3 100644
--- a/lib/tbot/tbot.go
+++ b/lib/tbot/tbot.go
@@ -173,15 +173,20 @@ func (b *Bot) Run(ctx context.Context) (err error) {
// This faux service allows us to get the bot's internal identity and client
// for tests, without exposing them on the core bot.Bot struct.
- services = append(services, func(deps bot.ServiceDependencies) (bot.Service, error) {
- b.mu.Lock()
- defer b.mu.Unlock()
+ if b.cfg.Testing {
+ services = append(services,
+ bot.NewServiceBuilder("internal/client-fetcher", "client-fetcher",
+ func(deps bot.ServiceDependencies) (bot.Service, error) {
+ b.mu.Lock()
+ defer b.mu.Unlock()
- b.identity = deps.BotIdentity
- b.client = deps.Client
+ b.identity = deps.BotIdentity
+ b.client = deps.Client
- return bot.NewNopService("client-fetcher"), nil
- })
+ return bot.NewNopService("client-fetcher"), nil
+ }),
+ )
+ }
// We only want to create this service if it's needed by a dependent
// service.
@@ -194,7 +199,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
return trustBundleCache
}
trustBundleCache = workloadidentity.NewTrustBundleCacheFacade()
- services = append(services, trustBundleCache.BuildService)
+ services = append(services, trustBundleCache.Builder())
return trustBundleCache
}
@@ -207,7 +212,7 @@ func (b *Bot) Run(ctx context.Context) (err error) {
return crlCache
}
crlCache = workloadidentity.NewCRLCacheFacade()
- services = append(services, crlCache.BuildService)
+ services = append(services, crlCache.Builder())
return crlCache
}
diff --git a/lib/tbot/tbot_test.go b/lib/tbot/tbot_test.go
index 6d7d53a1be100..fd336dad545f3 100644
--- a/lib/tbot/tbot_test.go
+++ b/lib/tbot/tbot_test.go
@@ -184,6 +184,7 @@ func defaultBotConfig(
// certs.
Insecure: opts.insecure,
Services: serviceConfigs,
+ Testing: true,
}
require.NoError(t, cfg.CheckAndSetDefaults())
@@ -1313,6 +1314,7 @@ func TestBotJoiningURI(t *testing.T) {
},
Oneshot: true,
Insecure: true,
+ Testing: true,
}
require.NoError(t, cfg.CheckAndSetDefaults())
diff --git a/lib/tbot/workloadidentity/crl_cache.go b/lib/tbot/workloadidentity/crl_cache.go
index 6c7dbd6fa214b..c8bcd22d50915 100644
--- a/lib/tbot/workloadidentity/crl_cache.go
+++ b/lib/tbot/workloadidentity/crl_cache.go
@@ -28,7 +28,6 @@ import (
"github.com/gravitational/trace"
- "github.com/gravitational/teleport"
workloadidentityv1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/workloadidentity/v1"
"github.com/gravitational/teleport/lib/tbot/bot"
"github.com/gravitational/teleport/lib/tbot/readyz"
@@ -83,29 +82,29 @@ type CRLCacheFacade struct {
crlCache *CRLCache
}
-// BuildService implements bot.ServiceBuilder to build the CRLCache once when the
-// bot starts up.
-func (f *CRLCacheFacade) BuildService(deps bot.ServiceDependencies) (bot.Service, error) {
- f.mu.Lock()
- defer f.mu.Unlock()
-
- if f.crlCache == nil {
- var err error
- f.crlCache, err = NewCRLCache(CRLCacheConfig{
- RevocationsClient: deps.Client.WorkloadIdentityRevocationServiceClient(),
- Logger: deps.Logger.With(
- teleport.ComponentKey,
- teleport.Component(teleport.ComponentTBot, "crl-cache"),
- ),
- StatusReporter: deps.StatusRegistry.AddService("crl-cache"),
- })
- if err != nil {
- return nil, trace.Wrap(err)
+// Builder returns a bot.ServiceBuilder to build the CRLCache once when the bot
+// starts up.
+func (f *CRLCacheFacade) Builder() bot.ServiceBuilder {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ if f.crlCache == nil {
+ var err error
+ f.crlCache, err = NewCRLCache(CRLCacheConfig{
+ RevocationsClient: deps.Client.WorkloadIdentityRevocationServiceClient(),
+ Logger: deps.Logger,
+ StatusReporter: deps.GetStatusReporter(),
+ })
+ if err != nil {
+ return nil, trace.Wrap(err)
+ }
+ close(f.ready)
}
- close(f.ready)
- }
- return f.crlCache, nil
+ return f.crlCache, nil
+ }
+ return bot.NewServiceBuilder("internal/crl-cache", "crl-cache", buildFn)
}
func (m *CRLCacheFacade) GetCRLSet(ctx context.Context) (*CRLSet, error) {
diff --git a/lib/tbot/workloadidentity/trust_bundle_cache.go b/lib/tbot/workloadidentity/trust_bundle_cache.go
index 9d4b2cda1e279..608cf3cc52ba3 100644
--- a/lib/tbot/workloadidentity/trust_bundle_cache.go
+++ b/lib/tbot/workloadidentity/trust_bundle_cache.go
@@ -35,7 +35,6 @@ import (
"github.com/spiffe/go-spiffe/v2/spiffeid"
"go.opentelemetry.io/otel"
- "github.com/gravitational/teleport"
machineidv1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/machineid/v1"
trustv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/trust/v1"
"github.com/gravitational/teleport/api/types"
@@ -220,33 +219,37 @@ func NewTrustBundleCacheFacade() *TrustBundleCacheFacade {
return &TrustBundleCacheFacade{ready: make(chan struct{})}
}
-// BuildService implements bot.ServiceBuilder to build the TrustBundleCache once
-// when the bot starts up.
-func (f *TrustBundleCacheFacade) BuildService(deps bot.ServiceDependencies) (bot.Service, error) {
- f.mu.Lock()
- defer f.mu.Unlock()
-
- if f.bundleCache == nil {
- var err error
- f.bundleCache, err = NewTrustBundleCache(TrustBundleCacheConfig{
- FederationClient: deps.Client.SPIFFEFederationServiceClient(),
- TrustClient: deps.Client.TrustClient(),
- EventsClient: deps.Client,
- ClusterName: deps.BotIdentity().ClusterName,
- BotIdentityReadyCh: deps.BotIdentityReadyCh,
- Logger: deps.Logger.With(
- teleport.ComponentKey,
- teleport.Component(teleport.ComponentTBot, "spiffe-trust-bundle-cache"),
- ),
- StatusReporter: deps.StatusRegistry.AddService("spiffe-trust-bundle-cache"),
- })
- if err != nil {
- return nil, trace.Wrap(err)
+// Builder returns a bot.ServiceBuilder to build the TrustBundleCache when the
+// bot starts up.
+func (f *TrustBundleCacheFacade) Builder() bot.ServiceBuilder {
+ buildFn := func(deps bot.ServiceDependencies) (bot.Service, error) {
+ f.mu.Lock()
+ defer f.mu.Unlock()
+
+ if f.bundleCache == nil {
+ var err error
+ f.bundleCache, err = NewTrustBundleCache(TrustBundleCacheConfig{
+ FederationClient: deps.Client.SPIFFEFederationServiceClient(),
+ TrustClient: deps.Client.TrustClient(),
+ EventsClient: deps.Client,
+ ClusterName: deps.BotIdentity().ClusterName,
+ BotIdentityReadyCh: deps.BotIdentityReadyCh,
+ Logger: deps.Logger,
+ StatusReporter: deps.GetStatusReporter(),
+ })
+ if err != nil {
+ return nil, trace.Wrap(err)
+ }
+ close(f.ready)
}
- close(f.ready)
+ return f.bundleCache, nil
}
- return f.bundleCache, nil
+ return bot.NewServiceBuilder(
+ "internal/spiffe-trust-bundle-cache",
+ "spiffe-trust-bundle-cache",
+ buildFn,
+ )
}
func (f *TrustBundleCacheFacade) GetBundleSet(ctx context.Context) (*BundleSet, error) {
diff --git a/lib/tbot/workloadidentity/workloadattest/unix.go b/lib/tbot/workloadidentity/workloadattest/unix.go
index a7b0a2aaa8b04..6e12de5d16c5c 100644
--- a/lib/tbot/workloadidentity/workloadattest/unix.go
+++ b/lib/tbot/workloadidentity/workloadattest/unix.go
@@ -25,6 +25,7 @@ import (
"errors"
"io"
"log/slog"
+ "time"
"github.com/gravitational/trace"
"github.com/shirou/gopsutil/v4/process"
@@ -35,6 +36,18 @@ import (
// DefaultBinaryHashMaxBytes is default value for BinaryHashMaxSizeBytes.
const DefaultBinaryHashMaxBytes = 1 << 30 // 1GiB
+// TestBinaryHashMaxBytes is a more suitable value for BinaryHashMaxSizeBytes
+// in tests. Reading `/proc//exe` relies on inode stability, and in GitHub
+// Actions we've observed read stalls likely caused by overlayfs or debugfs.
+const TestBinaryHashMaxBytes = 1 << 12 // 4KiB
+
+// BinaryHashReadTimeout is the maximum amount of time we will wait to read
+// a process's executable to calculate its checksum. In normal circumstances
+// we should never reach this timeout, but reading `/proc//exe` depends
+// on inode stability, so it's possible to encounter read stalls if there is
+// a network or overlay filesystem involved.
+const BinaryHashReadTimeout = 15 * time.Second
+
// UnixAttestorConfig holds the configuration for the Unix workload attestor.
type UnixAttestorConfig struct {
// BinaryHashMaxSize is the maximum number of bytes that will be read from
@@ -149,22 +162,51 @@ func (a *UnixAttestor) Attest(ctx context.Context, pid int) (*workloadidentityv1
att.BinaryPath = &path
}
- exe, err := a.os.OpenExe(ctx, p)
+ if hash := a.hashBinary(ctx, p); hash != "" {
+ att.BinaryHash = &hash
+ }
+
+ return att, nil
+}
+
+func (a *UnixAttestor) hashBinary(ctx context.Context, proc *process.Process) string {
+ exe, err := a.os.OpenExe(ctx, proc)
if err != nil {
a.log.ErrorContext(ctx, "Failed to open workload executable for hashing", "error", err)
- return att, nil
+ return ""
}
defer func() { _ = exe.Close() }()
- hash := sha256.New()
- if _, err := copyAtMost(hash, exe, a.cfg.BinaryHashMaxSizeBytes); err != nil {
- a.log.ErrorContext(ctx, "Failed to hash workload executable", "error", err)
- return att, nil
+ // Read the workload executable to calculate a checksum. We do this in a
+ // goroutine in case of read stalls (e.g. due to the executable being on
+ // a network or overlay filesystem). If this happens, the goroutine will
+ // terminate when we close the file descriptor (see defer statement above).
+ type sumResult struct {
+ sum string
+ err error
+ }
+ resCh := make(chan sumResult, 1)
+ go func() {
+ hash := sha256.New()
+ if _, err := copyAtMost(hash, exe, a.cfg.BinaryHashMaxSizeBytes); err == nil {
+ resCh <- sumResult{sum: hex.EncodeToString(hash.Sum(nil))}
+ } else {
+ resCh <- sumResult{err: err}
+ }
+ }()
+
+ select {
+ case res := <-resCh:
+ if res.err != nil {
+ a.log.ErrorContext(ctx, "Failed to hash workload executable", "error", err)
+ }
+ return res.sum
+ case <-time.After(BinaryHashReadTimeout):
+ a.log.ErrorContext(ctx, "Timeout reading workload executable. If this happens frequently, it could be due to the workload executable being on a network or overlay filesystem, you may also consider lowering `attestors.unix.binary_hash_max_size_bytes`.")
+ case <-ctx.Done():
}
- sum := hex.EncodeToString(hash.Sum(nil))
- att.BinaryHash = &sum
- return att, nil
+ return ""
}
// copyAtMost copies at most n bytes from src to dst. If src contains more than