diff --git a/server/cache.go b/server/cache.go index 23d82abeb5d..c78b983307c 100644 --- a/server/cache.go +++ b/server/cache.go @@ -40,20 +40,22 @@ type clusterInfo struct { sync.RWMutex *schedule.BasicCluster - id core.IDAllocator - kv *core.KV - meta *metapb.Cluster - activeRegions int - opt *scheduleOption - regionStats *regionStatistics + id core.IDAllocator + kv *core.KV + meta *metapb.Cluster + activeRegions int + opt *scheduleOption + regionStats *regionStatistics + labelLevelStats *labelLevelStatistics } func newClusterInfo(id core.IDAllocator, opt *scheduleOption, kv *core.KV) *clusterInfo { return &clusterInfo{ - BasicCluster: schedule.NewBasicCluster(), - id: id, - opt: opt, - kv: kv, + BasicCluster: schedule.NewBasicCluster(), + id: id, + opt: opt, + kv: kv, + labelLevelStats: newLabelLevelStatistics(), } } @@ -470,6 +472,12 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error { } } } + for _, item := range overlaps { + if c.regionStats != nil { + c.regionStats.clearDefunctRegion(item.GetId()) + } + c.labelLevelStats.clearDefunctRegion(item.GetId()) + } // Update related stores. if origin != nil { @@ -483,7 +491,7 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error { } if c.regionStats != nil { - c.regionStats.Observe(region, c.getRegionStores(region), c.GetLocationLabels()) + c.regionStats.Observe(region, c.getRegionStores(region)) } key := region.GetId() @@ -504,6 +512,14 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error { return nil } +func (c *clusterInfo) updateRegionsLabelLevelStats(regions []*core.RegionInfo) { + c.Lock() + defer c.Unlock() + for _, region := range regions { + c.labelLevelStats.Observe(region, c.getRegionStores(region), c.GetLocationLabels()) + } +} + func (c *clusterInfo) collectMetrics() { if c.regionStats == nil { return @@ -511,6 +527,7 @@ func (c *clusterInfo) collectMetrics() { c.RLock() defer c.RUnlock() c.regionStats.Collect() + c.labelLevelStats.Collect() } func (c *clusterInfo) GetRegionStatsByType(typ regionStatisticType) []*core.RegionInfo { diff --git a/server/coordinator.go b/server/coordinator.go index 4988ae3bb93..124c2a60eff 100644 --- a/server/coordinator.go +++ b/server/coordinator.go @@ -150,6 +150,8 @@ func (c *coordinator) patrolRegions() { break } } + // update label level isolation statistics. + c.cluster.updateRegionsLabelLevelStats(regions) } } diff --git a/server/metrics.go b/server/metrics.go index 72fe6250ba6..d49c43e0e6e 100644 --- a/server/metrics.go +++ b/server/metrics.go @@ -74,14 +74,13 @@ var ( Help: "Status of the regions.", }, []string{"type"}) - regionLabelLevelHistogram = prometheus.NewHistogram( - prometheus.HistogramOpts{ + regionLabelLevelGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ Namespace: "pd", Subsystem: "regions", Name: "label_level", - Help: "Bucketed histogram of the label level of the region.", - Buckets: prometheus.LinearBuckets(0, 1, 8), - }) + Help: "Number of regions in the different label level.", + }, []string{"type"}) timeJumpBackCounter = prometheus.NewCounter( prometheus.CounterOpts{ @@ -163,6 +162,6 @@ func init() { prometheus.MustRegister(tsoCounter) prometheus.MustRegister(storeStatusGauge) prometheus.MustRegister(regionStatusGauge) - prometheus.MustRegister(regionLabelLevelHistogram) + prometheus.MustRegister(regionLabelLevelGauge) prometheus.MustRegister(metadataGauge) } diff --git a/server/region_statistics.go b/server/region_statistics.go index b6b278a9063..e2df043bea2 100644 --- a/server/region_statistics.go +++ b/server/region_statistics.go @@ -14,6 +14,8 @@ package server import ( + "fmt" + "github.com/pingcap/pd/server/core" "github.com/pingcap/pd/server/namespace" ) @@ -68,7 +70,7 @@ func (r *regionStatistics) deleteEntry(deleteIndex regionStatisticType, regionID } } -func (r *regionStatistics) Observe(region *core.RegionInfo, stores []*core.StoreInfo, labels []string) { +func (r *regionStatistics) Observe(region *core.RegionInfo, stores []*core.StoreInfo) { // Region state. regionID := region.GetId() namespace := r.classifier.GetRegionNamespace(region) @@ -113,11 +115,60 @@ func (r *regionStatistics) Observe(region *core.RegionInfo, stores []*core.Store } r.deleteEntry(deleteIndex, regionID) r.index[regionID] = peerTypeIndex - if len(stores) == 0 { - return +} + +func (r *regionStatistics) clearDefunctRegion(regionID uint64) { + if oldIndex, ok := r.index[regionID]; ok { + r.deleteEntry(oldIndex, regionID) } +} + +func (r *regionStatistics) Collect() { + regionStatusGauge.WithLabelValues("miss_peer_region_count").Set(float64(len(r.stats[missPeer]))) + regionStatusGauge.WithLabelValues("extra_peer_region_count").Set(float64(len(r.stats[extraPeer]))) + regionStatusGauge.WithLabelValues("down_peer_region_count").Set(float64(len(r.stats[downPeer]))) + regionStatusGauge.WithLabelValues("pending_peer_region_count").Set(float64(len(r.stats[pendingPeer]))) + regionStatusGauge.WithLabelValues("offline_peer_region_count").Set(float64(len(r.stats[offlinePeer]))) + regionStatusGauge.WithLabelValues("incorrect_namespace_region_count").Set(float64(len(r.stats[incorrectNamespace]))) +} + +type labelLevelStatistics struct { + regionLabelLevelStats map[uint64]int + labelLevelCounter map[int]int +} + +func newLabelLevelStatistics() *labelLevelStatistics { + return &labelLevelStatistics{ + regionLabelLevelStats: make(map[uint64]int), + labelLevelCounter: make(map[int]int), + } +} + +func (l *labelLevelStatistics) Observe(region *core.RegionInfo, stores []*core.StoreInfo, labels []string) { + regionID := region.GetId() regionLabelLevel := getRegionLabelIsolationLevel(stores, labels) - regionLabelLevelHistogram.Observe(float64(regionLabelLevel)) + if level, ok := l.regionLabelLevelStats[regionID]; ok { + if level == regionLabelLevel { + return + } + l.labelLevelCounter[level]-- + } + l.regionLabelLevelStats[regionID] = regionLabelLevel + l.labelLevelCounter[regionLabelLevel]++ +} + +func (l *labelLevelStatistics) Collect() { + for level, count := range l.labelLevelCounter { + typ := fmt.Sprintf("level_%d", level) + regionStatusGauge.WithLabelValues(typ).Set(float64(count)) + } +} + +func (l *labelLevelStatistics) clearDefunctRegion(regionID uint64) { + if level, ok := l.regionLabelLevelStats[regionID]; ok { + l.labelLevelCounter[level]-- + delete(l.regionLabelLevelStats, regionID) + } } func getRegionLabelIsolationLevel(stores []*core.StoreInfo, labels []string) int { @@ -158,12 +209,3 @@ func notIsolatedStoresWithLabel(stores []*core.StoreInfo, label string) [][]*cor } return res } - -func (r *regionStatistics) Collect() { - regionStatusGauge.WithLabelValues("miss_peer_region_count").Set(float64(len(r.stats[missPeer]))) - regionStatusGauge.WithLabelValues("extra_peer_region_count").Set(float64(len(r.stats[extraPeer]))) - regionStatusGauge.WithLabelValues("down_peer_region_count").Set(float64(len(r.stats[downPeer]))) - regionStatusGauge.WithLabelValues("pending_peer_region_count").Set(float64(len(r.stats[pendingPeer]))) - regionStatusGauge.WithLabelValues("offline_peer_region_count").Set(float64(len(r.stats[offlinePeer]))) - regionStatusGauge.WithLabelValues("incorrect_namespace_region_count").Set(float64(len(r.stats[incorrectNamespace]))) -} diff --git a/server/region_statistics_test.go b/server/region_statistics_test.go index 9cf3dc37972..e460e3b502c 100644 --- a/server/region_statistics_test.go +++ b/server/region_statistics_test.go @@ -76,18 +76,18 @@ func (t *testRegionStatistcs) TestRegionStatistics(c *C) { region1 := core.NewRegionInfo(r1, peers[0]) region2 := core.NewRegionInfo(r2, peers[0]) regionStats := newRegionStatistics(opt, mockClassifier{}) - regionStats.Observe(region1, stores, nil) + regionStats.Observe(region1, stores) c.Assert(len(regionStats.stats[extraPeer]), Equals, 1) region1.DownPeers = downPeers region1.PendingPeers = peers[0:1] - regionStats.Observe(region1, stores, nil) + regionStats.Observe(region1, stores) c.Assert(len(regionStats.stats[extraPeer]), Equals, 1) c.Assert(len(regionStats.stats[missPeer]), Equals, 0) c.Assert(len(regionStats.stats[downPeer]), Equals, 1) c.Assert(len(regionStats.stats[pendingPeer]), Equals, 1) c.Assert(len(regionStats.stats[incorrectNamespace]), Equals, 1) region2.DownPeers = downPeers[0:1] - regionStats.Observe(region2, stores[0:2], nil) + regionStats.Observe(region2, stores[0:2]) c.Assert(len(regionStats.stats[extraPeer]), Equals, 1) c.Assert(len(regionStats.stats[missPeer]), Equals, 1) c.Assert(len(regionStats.stats[downPeer]), Equals, 2) @@ -95,11 +95,12 @@ func (t *testRegionStatistcs) TestRegionStatistics(c *C) { c.Assert(len(regionStats.stats[offlinePeer]), Equals, 1) c.Assert(len(regionStats.stats[incorrectNamespace]), Equals, 1) stores[3].State = metapb.StoreState_Up - regionStats.Observe(region1, stores, nil) + regionStats.Observe(region1, stores) c.Assert(len(regionStats.stats[offlinePeer]), Equals, 0) } func (t *testRegionStatistcs) TestRegionLabelIsolationLevel(c *C) { + labelLevelStats := newLabelLevelStatistics() labelsSet := [][]map[string]string{ { {"zone": "z1", "rack": "r1", "host": "h1"}, @@ -128,6 +129,8 @@ func (t *testRegionStatistcs) TestRegionLabelIsolationLevel(c *C) { }, } res := []int{2, 3, 1, 2, 0} + counter := []int{1, 1, 2, 1, 0} + regionID := 1 f := func(labels []map[string]string, res int) { metaStores := []*metapb.Store{ {Id: 1, Address: "mock://tikv-1"}, @@ -142,14 +145,20 @@ func (t *testRegionStatistcs) TestRegionLabelIsolationLevel(c *C) { } stores = append(stores, s) } + region := core.NewRegionInfo(&metapb.Region{Id: uint64(regionID)}, nil) level := getRegionLabelIsolationLevel(stores, []string{"zone", "rack", "host"}) + labelLevelStats.Observe(region, stores, []string{"zone", "rack", "host"}) c.Assert(level, Equals, res) + regionID++ } for i, labels := range labelsSet { f(labels, res[i]) - } + for i, res := range counter { + c.Assert(labelLevelStats.labelLevelCounter[i], Equals, res) + } + level := getRegionLabelIsolationLevel(nil, []string{"zone", "rack", "host"}) c.Assert(level, Equals, 0) level = getRegionLabelIsolationLevel(nil, nil)