Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
* [ENHANCEMENT] Distributor: Add native histograms max sample size bytes limit validation. #6834
* [ENHANCEMENT] Querier: Support caching parquet labels file in parquet queryable. #6835
* [ENHANCEMENT] Querier: Support query limits in parquet queryable. #6870
* [ENHANCEMENT] Ring: Add zone label to ring_members metric. #6900
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
* [BUGFIX] Ingester: Fix labelset data race condition. #6573
* [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576
Expand Down
28 changes: 21 additions & 7 deletions pkg/ring/ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,8 @@ type Ring struct {

// List of zones for which there's at least 1 instance in the ring. This list is guaranteed
// to be sorted alphabetically.
ringZones []string
ringZones []string
previousRingZones []string

// Cache of shuffle-sharded subrings per identifier. Invalidated when topology changes.
// If set to nil, no caching is done (used by tests, and subrings).
Expand Down Expand Up @@ -262,7 +263,7 @@ func NewWithStoreClientAndStrategy(cfg Config, name, key string, store kv.Client
Name: "ring_members",
Help: "Number of members in the ring",
ConstLabels: map[string]string{"name": name}},
[]string{"state"}),
[]string{"state", "zone"}),
totalTokensGauge: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "ring_tokens_total",
Help: "Number of tokens in the ring",
Expand Down Expand Up @@ -362,6 +363,7 @@ func (r *Ring) updateRingState(ringDesc *Desc) {
r.ringTokensByZone = ringTokensByZone
r.ringInstanceByToken = ringInstanceByToken
r.ringInstanceIdByAddr = ringInstanceByAddr
r.previousRingZones = r.ringZones
r.ringZones = ringZones
r.lastTopologyChange = now
if r.shuffledSubringCache != nil {
Expand Down Expand Up @@ -665,12 +667,19 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) {
return
}

numByState := map[string]int{}
numByStateByZone := map[string]map[string]int{}
oldestTimestampByState := map[string]int64{}

// Initialized to zero so we emit zero-metrics (instead of not emitting anything)
for _, s := range []string{unhealthy, ACTIVE.String(), LEAVING.String(), PENDING.String(), JOINING.String(), READONLY.String()} {
numByState[s] = 0
numByStateByZone[s] = map[string]int{}
// make sure removed zones got zero value
for _, zone := range r.previousRingZones {
numByStateByZone[s][zone] = 0
}
for _, zone := range r.ringZones {
numByStateByZone[s][zone] = 0
}
oldestTimestampByState[s] = 0
}

Expand All @@ -679,14 +688,19 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) {
if !r.IsHealthy(&instance, Reporting, r.KVClient.LastUpdateTime(r.key)) {
s = unhealthy
}
numByState[s]++
if _, ok := numByStateByZone[s]; !ok {
numByStateByZone[s] = map[string]int{}
}
numByStateByZone[s][instance.Zone]++
if oldestTimestampByState[s] == 0 || instance.Timestamp < oldestTimestampByState[s] {
oldestTimestampByState[s] = instance.Timestamp
}
}

for state, count := range numByState {
r.numMembersGaugeVec.WithLabelValues(state).Set(float64(count))
for state, zones := range numByStateByZone {
for zone, count := range zones {
r.numMembersGaugeVec.WithLabelValues(state, zone).Set(float64(count))
}
}
for state, timestamp := range oldestTimestampByState {
r.oldestTimestampGaugeVec.WithLabelValues(state).Set(float64(timestamp))
Expand Down
166 changes: 142 additions & 24 deletions pkg/ring/ring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3202,12 +3202,12 @@ func TestUpdateMetrics(t *testing.T) {
ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
ring_members{name="test",state="ACTIVE"} 2
ring_members{name="test",state="JOINING"} 0
ring_members{name="test",state="LEAVING"} 0
ring_members{name="test",state="PENDING"} 0
ring_members{name="test",state="READONLY"} 0
ring_members{name="test",state="Unhealthy"} 0
ring_members{name="test",state="ACTIVE",zone=""} 2
ring_members{name="test",state="JOINING",zone=""} 0
ring_members{name="test",state="LEAVING",zone=""} 0
ring_members{name="test",state="PENDING",zone=""} 0
ring_members{name="test",state="READONLY",zone=""} 0
ring_members{name="test",state="Unhealthy",zone=""} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
Expand All @@ -3230,12 +3230,12 @@ func TestUpdateMetrics(t *testing.T) {
Expected: `
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
ring_members{name="test",state="ACTIVE"} 2
ring_members{name="test",state="JOINING"} 0
ring_members{name="test",state="LEAVING"} 0
ring_members{name="test",state="PENDING"} 0
ring_members{name="test",state="READONLY"} 0
ring_members{name="test",state="Unhealthy"} 0
ring_members{name="test",state="ACTIVE",zone=""} 2
ring_members{name="test",state="JOINING",zone=""} 0
ring_members{name="test",state="LEAVING",zone=""} 0
ring_members{name="test",state="PENDING",zone=""} 0
ring_members{name="test",state="READONLY",zone=""} 0
ring_members{name="test",state="Unhealthy",zone=""} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
Expand Down Expand Up @@ -3310,12 +3310,12 @@ func TestUpdateMetricsWithRemoval(t *testing.T) {
ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
ring_members{name="test",state="ACTIVE"} 2
ring_members{name="test",state="JOINING"} 0
ring_members{name="test",state="LEAVING"} 0
ring_members{name="test",state="PENDING"} 0
ring_members{name="test",state="READONLY"} 0
ring_members{name="test",state="Unhealthy"} 0
ring_members{name="test",state="ACTIVE",zone=""} 2
ring_members{name="test",state="JOINING",zone=""} 0
ring_members{name="test",state="LEAVING",zone=""} 0
ring_members{name="test",state="PENDING",zone=""} 0
ring_members{name="test",state="READONLY",zone=""} 0
ring_members{name="test",state="Unhealthy",zone=""} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
Expand Down Expand Up @@ -3347,12 +3347,130 @@ func TestUpdateMetricsWithRemoval(t *testing.T) {
ring_member_ownership_percent{member="A",name="test"} 1
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
ring_members{name="test",state="ACTIVE"} 1
ring_members{name="test",state="JOINING"} 0
ring_members{name="test",state="LEAVING"} 0
ring_members{name="test",state="PENDING"} 0
ring_members{name="test",state="READONLY"} 0
ring_members{name="test",state="Unhealthy"} 0
ring_members{name="test",state="ACTIVE",zone=""} 1
ring_members{name="test",state="JOINING",zone=""} 0
ring_members{name="test",state="LEAVING",zone=""} 0
ring_members{name="test",state="PENDING",zone=""} 0
ring_members{name="test",state="READONLY",zone=""} 0
ring_members{name="test",state="Unhealthy",zone=""} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 22
ring_oldest_member_timestamp{name="test",state="JOINING"} 0
ring_oldest_member_timestamp{name="test",state="LEAVING"} 0
ring_oldest_member_timestamp{name="test",state="PENDING"} 0
ring_oldest_member_timestamp{name="test",state="READONLY"} 0
ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0
# HELP ring_tokens_owned The number of tokens in the ring owned by the member
# TYPE ring_tokens_owned gauge
ring_tokens_owned{member="A",name="test"} 2
# HELP ring_tokens_total Number of tokens in the ring
# TYPE ring_tokens_total gauge
ring_tokens_total{name="test"} 2
`))
assert.NoError(t, err)
}

func TestUpdateMetricsWithZone(t *testing.T) {
cfg := Config{
KVStore: kv.Config{},
HeartbeatTimeout: 0, // get healthy stats
ReplicationFactor: 3,
ZoneAwarenessEnabled: true,
DetailedMetricsEnabled: true,
}

registry := prometheus.NewRegistry()

// create the ring to set up metrics, but do not start
ring, err := NewWithStoreClientAndStrategy(cfg, testRingName, testRingKey, &MockClient{}, NewDefaultReplicationStrategy(), registry, log.NewNopLogger())
require.NoError(t, err)

ringDesc := Desc{
Ingesters: map[string]InstanceDesc{
"A": {Addr: "127.0.0.1", Timestamp: 22, Zone: "zone1", Tokens: []uint32{math.MaxUint32 / 6, (math.MaxUint32 / 6) * 4}},
"B": {Addr: "127.0.0.2", Timestamp: 11, Zone: "zone2", Tokens: []uint32{(math.MaxUint32 / 6) * 2, (math.MaxUint32 / 6) * 5}},
"C": {Addr: "127.0.0.3", Timestamp: 33, Zone: "zone3", Tokens: []uint32{(math.MaxUint32 / 6) * 3, math.MaxUint32}},
},
}
ring.updateRingState(&ringDesc)

err = testutil.GatherAndCompare(registry, bytes.NewBufferString(`
# HELP ring_member_ownership_percent The percent ownership of the ring by member
# TYPE ring_member_ownership_percent gauge
ring_member_ownership_percent{member="A",name="test"} 0.3333333332557231
ring_member_ownership_percent{member="B",name="test"} 0.3333333330228925
ring_member_ownership_percent{member="C",name="test"} 0.3333333337213844
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
ring_members{name="test",state="ACTIVE",zone="zone1"} 1
ring_members{name="test",state="ACTIVE",zone="zone2"} 1
ring_members{name="test",state="ACTIVE",zone="zone3"} 1
ring_members{name="test",state="JOINING",zone="zone1"} 0
ring_members{name="test",state="JOINING",zone="zone2"} 0
ring_members{name="test",state="JOINING",zone="zone3"} 0
ring_members{name="test",state="LEAVING",zone="zone1"} 0
ring_members{name="test",state="LEAVING",zone="zone2"} 0
ring_members{name="test",state="LEAVING",zone="zone3"} 0
ring_members{name="test",state="PENDING",zone="zone1"} 0
ring_members{name="test",state="PENDING",zone="zone2"} 0
ring_members{name="test",state="PENDING",zone="zone3"} 0
ring_members{name="test",state="READONLY",zone="zone1"} 0
ring_members{name="test",state="READONLY",zone="zone2"} 0
ring_members{name="test",state="READONLY",zone="zone3"} 0
ring_members{name="test",state="Unhealthy",zone="zone1"} 0
ring_members{name="test",state="Unhealthy",zone="zone2"} 0
ring_members{name="test",state="Unhealthy",zone="zone3"} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11
ring_oldest_member_timestamp{name="test",state="JOINING"} 0
ring_oldest_member_timestamp{name="test",state="LEAVING"} 0
ring_oldest_member_timestamp{name="test",state="PENDING"} 0
ring_oldest_member_timestamp{name="test",state="READONLY"} 0
ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0
# HELP ring_tokens_owned The number of tokens in the ring owned by the member
# TYPE ring_tokens_owned gauge
ring_tokens_owned{member="A",name="test"} 2
ring_tokens_owned{member="B",name="test"} 2
ring_tokens_owned{member="C",name="test"} 2
# HELP ring_tokens_total Number of tokens in the ring
# TYPE ring_tokens_total gauge
ring_tokens_total{name="test"} 6
`))
require.NoError(t, err)

ringDescNew := Desc{
Ingesters: map[string]InstanceDesc{
"A": {Addr: "127.0.0.1", Timestamp: 22, Zone: "zone1", Tokens: []uint32{math.MaxUint32 / 6, (math.MaxUint32 / 6) * 4}},
},
}
ring.updateRingState(&ringDescNew)

err = testutil.GatherAndCompare(registry, bytes.NewBufferString(`
# HELP ring_member_ownership_percent The percent ownership of the ring by member
# TYPE ring_member_ownership_percent gauge
ring_member_ownership_percent{member="A",name="test"} 1
# HELP ring_members Number of members in the ring
# TYPE ring_members gauge
ring_members{name="test",state="ACTIVE",zone="zone1"} 1
ring_members{name="test",state="ACTIVE",zone="zone2"} 0
ring_members{name="test",state="ACTIVE",zone="zone3"} 0
ring_members{name="test",state="JOINING",zone="zone1"} 0
ring_members{name="test",state="JOINING",zone="zone2"} 0
ring_members{name="test",state="JOINING",zone="zone3"} 0
ring_members{name="test",state="LEAVING",zone="zone1"} 0
ring_members{name="test",state="LEAVING",zone="zone2"} 0
ring_members{name="test",state="LEAVING",zone="zone3"} 0
ring_members{name="test",state="PENDING",zone="zone1"} 0
ring_members{name="test",state="PENDING",zone="zone2"} 0
ring_members{name="test",state="PENDING",zone="zone3"} 0
ring_members{name="test",state="READONLY",zone="zone1"} 0
ring_members{name="test",state="READONLY",zone="zone2"} 0
ring_members{name="test",state="READONLY",zone="zone3"} 0
ring_members{name="test",state="Unhealthy",zone="zone1"} 0
ring_members{name="test",state="Unhealthy",zone="zone2"} 0
ring_members{name="test",state="Unhealthy",zone="zone3"} 0
# HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring.
# TYPE ring_oldest_member_timestamp gauge
ring_oldest_member_timestamp{name="test",state="ACTIVE"} 22
Expand Down
Loading