Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/pages/includes/metrics.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
| `teleport_audit_parquetlog_errors_from_collect_count` | counter | Teleport Audit Log | Number of collect failures in Parquet-format audit log. |
| `teleport_connected_resources` | gauge | Teleport Auth | Number and type of resources connected via keepalives. |
| `teleport_registered_servers` | gauge | Teleport Auth | The number of Teleport services that are connected to an Auth Service instance grouped by version. |
| `teleport_registered_servers_by_install_methods` | gauge | Teleport Auth | The number of Teleport services that are connected to an Auth Service instance grouped by install methods. |
| `user_login_total` | counter | Teleport Auth | Number of user logins. |
| `teleport_migrations` | gauge | Teleport Auth | Tracks for each migration if it is active (1) or not (0). |
| `watcher_event_sizes` | histogram | cache | Overall size of events emitted. |
Expand Down
38 changes: 38 additions & 0 deletions lib/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,15 @@ var (
[]string{teleport.TagVersion},
)

registeredAgentsInstallMethod = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: teleport.MetricNamespace,
Name: teleport.MetricRegisteredServersByInstallMethods,
Help: "The number of Teleport services that are connected to an auth server by install method.",
},
[]string{teleport.TagInstallMethods},
)

migrations = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: teleport.MetricNamespace,
Expand Down Expand Up @@ -577,6 +586,7 @@ var (
registeredAgents, migrations,
totalInstancesMetric, enrolledInUpgradesMetric, upgraderCountsMetric,
accessRequestsCreatedMetric,
registeredAgentsInstallMethod,
}
)

Expand Down Expand Up @@ -1009,6 +1019,7 @@ func (a *Server) runPeriodicOperations() {
heartbeatsMissedByAuth.Set(float64(missedKeepAliveCount))
case <-promTicker.Next():
a.updateVersionMetrics()
a.updateInstallMethodsMetrics()
case <-releaseCheck.Next():
a.syncReleaseAlerts(ctx, true)
case <-localReleaseCheck.Next():
Expand Down Expand Up @@ -1291,6 +1302,33 @@ func (a *Server) updateVersionMetrics() {
}
}

// updateInstallMethodsMetrics leverages the inventory control stream to report the install methods
// of all instances that are connected to a single auth server via prometheus metrics.
// To get an accurate representation of install methods in an entire cluster the metric must be aggregated
// with all auth instances.
func (a *Server) updateInstallMethodsMetrics() {
installMethodCount := make(map[string]int)

// record install methods for all connected resources
a.inventory.Iter(func(handle inventory.UpstreamHandle) {
installMethod := "unknown"
installMethods := append([]string{}, handle.AgentMetadata().InstallMethods...)

if len(installMethods) > 0 {
slices.Sort(installMethods)
installMethod = strings.Join(installMethods, ",")
}

installMethodCount[installMethod]++
})

// reset the gauges so that any versions that fall off are removed from exported metrics
registeredAgentsInstallMethod.Reset()
for installMethod, count := range installMethodCount {
registeredAgentsInstallMethod.WithLabelValues(installMethod).Set(float64(count))
}
}

var (
// remoteClusterRefreshLimit is the maximum number of backend updates that will be performed
// during periodic remote cluster connection status refresh.
Expand Down
2 changes: 2 additions & 0 deletions lib/inventory/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,8 @@ func (c *Controller) handleSSHServerHB(handle *upstreamHandle, sshServer *types.
}

func (c *Controller) handleAgentMetadata(handle *upstreamHandle, m proto.UpstreamInventoryAgentMetadata) {
handle.SetAgentMetadata(m)

svcs := make([]string, 0, len(handle.Hello().Services))
for _, svc := range handle.Hello().Services {
svcs = append(svcs, strings.ToLower(svc.String()))
Expand Down
61 changes: 61 additions & 0 deletions lib/inventory/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ package inventory

import (
"context"
"runtime"
"sync"
"testing"
"time"

"github.com/gravitational/trace"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"

"github.com/gravitational/teleport"
"github.com/gravitational/teleport/api/client"
Expand Down Expand Up @@ -453,6 +455,65 @@ func TestUpdateLabels(t *testing.T) {
}, time.Second, 100*time.Millisecond)
}

// TestAgentMetadata verifies that an instance's agent metadata is received in
// inventory control stream.
func TestAgentMetadata(t *testing.T) {
// set the install method to validate it was returned as agent metadata
t.Setenv("TELEPORT_INSTALL_METHOD_AWSOIDC_DEPLOYSERVICE", "true")
const serverID = "test-instance"
const peerAddr = "1.2.3.4:456"

events := make(chan testEvent, 1024)

auth := &fakeAuth{}

controller := NewController(
auth,
usagereporter.DiscardUsageReporter{},
withInstanceHBInterval(time.Millisecond*200),
withTestEventsChannel(events),
)
defer controller.Close()

// Set up fake in-memory control stream.
upstream, downstream := client.InventoryControlStreamPipe(client.ICSPipePeerAddr(peerAddr))
upstreamHello := proto.UpstreamInventoryHello{
ServerID: serverID,
Version: teleport.Version,
Services: []types.SystemRole{types.RoleNode},
}
downstreamHello := proto.DownstreamInventoryHello{
Version: teleport.Version,
ServerID: "auth",
}

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
NewDownstreamHandle(func(ctx context.Context) (client.DownstreamInventoryControlStream, error) {
return downstream, nil
}, upstreamHello)

// Wait for upstream hello.
select {
case msg := <-upstream.Recv():
require.Equal(t, upstreamHello, msg)
case <-ctx.Done():
require.Fail(t, "never got upstream hello")
}
require.NoError(t, upstream.Send(ctx, downstreamHello))
controller.RegisterControlStream(upstream, upstreamHello)

// Verify that control stream upstreamHandle is now accessible.
upstreamHandle, ok := controller.GetControlStream(serverID)
require.True(t, ok)

// Validate that the agent's metadata ends up in the auth server.
require.Eventually(t, func() bool {
return slices.Equal(upstreamHandle.AgentMetadata().InstallMethods, []string{"awsoidc_deployservice"}) &&
upstreamHandle.AgentMetadata().OS == runtime.GOOS
}, 5*time.Second, 200*time.Millisecond)
}

type eventOpts struct {
expect map[testEvent]int
deny map[testEvent]struct{}
Expand Down
20 changes: 20 additions & 0 deletions lib/inventory/inventory.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,9 @@ type UpstreamHandle interface {
// Hello gets the cached upstream hello that was used to initialize the stream.
Hello() proto.UpstreamInventoryHello

// AgentMetadata is the service's metadata: OS, glibc version, install methods, ...
AgentMetadata() proto.UpstreamInventoryAgentMetadata

Ping(ctx context.Context, id uint64) (d time.Duration, err error)
// HasService is a helper for checking if a given service is associated with this
// stream.
Expand Down Expand Up @@ -496,6 +499,9 @@ type upstreamHandle struct {
client.UpstreamInventoryControlStream
hello proto.UpstreamInventoryHello

agentMDLock sync.RWMutex
agentMetadata proto.UpstreamInventoryAgentMetadata

ticker *interval.MultiInterval[intervalKey]

pingC chan pingRequest
Expand Down Expand Up @@ -572,6 +578,20 @@ func (h *upstreamHandle) Hello() proto.UpstreamInventoryHello {
return h.hello
}

// AgentMetadata returns the Agent's metadata (eg os, glibc version, install methods, teleport version).
func (h *upstreamHandle) AgentMetadata() proto.UpstreamInventoryAgentMetadata {
h.agentMDLock.RLock()
defer h.agentMDLock.RUnlock()
return h.agentMetadata
}

// SetAgentMetadata sets the agent metadata for the current handler.
func (h *upstreamHandle) SetAgentMetadata(agentMD proto.UpstreamInventoryAgentMetadata) {
h.agentMDLock.Lock()
defer h.agentMDLock.Unlock()
h.agentMetadata = agentMD
}

func (h *upstreamHandle) HasService(service types.SystemRole) bool {
for _, s := range h.hello.Services {
if s == service {
Expand Down
9 changes: 9 additions & 0 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ const (
// MetricRegisteredServers tracks the number of Teleport servers that have successfully registered with the Teleport cluster and have not reached the end of their ttl
MetricRegisteredServers = "registered_servers"

// MetricRegisteredServersByInstallMethods tracks the number of Teleport servers, and their installation method,
// that have successfully registered with the Teleport cluster and have not reached the end of their ttl
MetricRegisteredServersByInstallMethods = "registered_servers_by_install_methods"

// MetricReverseSSHTunnels defines the number of connected SSH reverse tunnels to the proxy
MetricReverseSSHTunnels = "reverse_tunnels_connected"

Expand Down Expand Up @@ -268,6 +272,11 @@ const (

// TagClient is a prometheus label to indicate what client the metric is tied to
TagClient = "client"

// TagInstallMethods is a prometheus label to indicate what installation methods
// were used for the agent.
// This value comes from UpstreamInventoryAgentMetadata (sourced in lib/inventory/metadata.fetchInstallMethods).
TagInstallMethods = "install_methods"
)

const (
Expand Down