diff --git a/pkg/cluster/bridge.go b/pkg/cluster/bridge.go index 4b8442dafa..f4c32bfdae 100644 --- a/pkg/cluster/bridge.go +++ b/pkg/cluster/bridge.go @@ -119,7 +119,7 @@ func (c *gossipCluster) demoteFromBridge() { metrics.ClusterBridgeStatus.Set(0) metrics.ClusterBridgeTransitionsTotal.WithLabelValues("demoted").Inc() - metrics.ClusterMembersCount.WithLabelValues("wan").Set(0) + metrics.ClusterMembersCount.WithLabelValues("wan", c.config.Region).Set(0) // Leave and shutdown outside the lock since Leave can trigger callbacks if wan != nil { diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index fdc30b825d..55684d3493 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -116,10 +116,10 @@ func New(cfg Config) (Cluster, error) { c.mu.RUnlock() if lan != nil { - metrics.ClusterMembersCount.WithLabelValues("lan").Set(float64(lan.NumMembers())) + metrics.ClusterMembersCount.WithLabelValues("lan", c.config.Region).Set(float64(lan.NumMembers())) } if wan != nil { - metrics.ClusterMembersCount.WithLabelValues("wan").Set(float64(wan.NumMembers())) + metrics.ClusterMembersCount.WithLabelValues("wan", c.config.Region).Set(float64(wan.NumMembers())) } }) diff --git a/pkg/cluster/delegate_lan.go b/pkg/cluster/delegate_lan.go index c8dfe21b83..d83fcb4ab3 100644 --- a/pkg/cluster/delegate_lan.go +++ b/pkg/cluster/delegate_lan.go @@ -58,7 +58,7 @@ func (d *lanDelegate) NotifyMsg(data []byte) { if msg.SentAtMs > 0 { latency := time.Since(time.UnixMilli(msg.SentAtMs)).Seconds() if latency >= 0 { - metrics.ClusterMessageLatencySeconds.WithLabelValues(direction, msg.SourceRegion).Observe(latency) + metrics.ClusterMessageLatencySeconds.WithLabelValues(direction, msg.SourceRegion, d.cluster.config.Region).Observe(latency) } } diff --git a/pkg/cluster/delegate_wan.go b/pkg/cluster/delegate_wan.go index 8dd2b4c2b4..e01f151175 100644 --- a/pkg/cluster/delegate_wan.go +++ b/pkg/cluster/delegate_wan.go @@ -54,7 +54,7 @@ func (d *wanDelegate) NotifyMsg(data []byte) { if msg.SentAtMs > 0 { latency := time.Since(time.UnixMilli(msg.SentAtMs)).Seconds() if latency >= 0 { - metrics.ClusterMessageLatencySeconds.WithLabelValues("wan", msg.SourceRegion).Observe(latency) + metrics.ClusterMessageLatencySeconds.WithLabelValues("wan", msg.SourceRegion, d.cluster.config.Region).Observe(latency) } } diff --git a/pkg/cluster/metrics/prometheus.go b/pkg/cluster/metrics/prometheus.go index f1ff65d062..6a0ad436a8 100644 --- a/pkg/cluster/metrics/prometheus.go +++ b/pkg/cluster/metrics/prometheus.go @@ -29,7 +29,7 @@ var ( Name: "members_count", Help: "Current number of members in the cluster pool.", }, - []string{"pool"}, + []string{"pool", "region"}, ) // ClusterBridgeStatus indicates whether this node is currently the bridge (1) or not (0). @@ -91,6 +91,7 @@ var ( // ClusterMessageLatencySeconds measures end-to-end transport latency (sent_at_ms to now). // direction=lan gives intra-region hop time, direction=wan gives full cross-region delivery time. + // source_region is the originating region, destination_region is the receiving region. ClusterMessageLatencySeconds = promauto.NewHistogramVec( prometheus.HistogramOpts{ Namespace: "unkey", @@ -99,7 +100,7 @@ var ( Help: "End-to-end message transport latency in seconds.", Buckets: prometheus.DefBuckets, }, - []string{"direction", "source_region"}, + []string{"direction", "source_region", "destination_region"}, ) // ClusterMessageUnmarshalErrorsTotal counts proto deserialization failures.