From 357086407d8884c48fe8b9aebd79b1d1b3b0d303 Mon Sep 17 00:00:00 2001 From: nolouch Date: Tue, 6 Mar 2018 11:20:27 +0800 Subject: [PATCH 1/3] server: change PD region label isolate level Histogram to Gauge --- server/metrics.go | 11 +++-- server/region_statistics.go | 69 ++++++++++++++++++++++++-------- server/region_statistics_test.go | 11 ++++- 3 files changed, 67 insertions(+), 24 deletions(-) diff --git a/server/metrics.go b/server/metrics.go index a06253aecf0..1af6e9f64ea 100644 --- a/server/metrics.go +++ b/server/metrics.go @@ -66,14 +66,13 @@ var ( Help: "Status of the regions.", }, []string{"type"}) - regionLabelLevelHistogram = prometheus.NewHistogram( - prometheus.HistogramOpts{ + regionLabelLevelGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ Namespace: "pd", Subsystem: "regions", Name: "label_level", - Help: "Bucketed histogram of the label level of the region.", - Buckets: prometheus.LinearBuckets(0, 1, 8), - }) + Help: "Number of regions in the different label level.", + }, []string{"type"}) timeJumpBackCounter = prometheus.NewCounter( prometheus.CounterOpts{ @@ -155,6 +154,6 @@ func init() { prometheus.MustRegister(tsoCounter) prometheus.MustRegister(storeStatusGauge) prometheus.MustRegister(regionStatusGauge) - prometheus.MustRegister(regionLabelLevelHistogram) + prometheus.MustRegister(regionLabelLevelGauge) prometheus.MustRegister(metadataGauge) } diff --git a/server/region_statistics.go b/server/region_statistics.go index 9ec779f47ba..82d135fd32e 100644 --- a/server/region_statistics.go +++ b/server/region_statistics.go @@ -14,6 +14,8 @@ package server import ( + "fmt" + "github.com/pingcap/pd/server/core" "github.com/pingcap/pd/server/namespace" ) @@ -29,18 +31,20 @@ const ( ) type regionStatistics struct { - opt *scheduleOption - classifier namespace.Classifier - stats map[regionStatisticType]map[uint64]*core.RegionInfo - index map[uint64]regionStatisticType + opt *scheduleOption + classifier namespace.Classifier + stats map[regionStatisticType]map[uint64]*core.RegionInfo + index map[uint64]regionStatisticType + labelLevelStats *labelLevelStatistics } func newRegionStatistics(opt *scheduleOption, classifier namespace.Classifier) *regionStatistics { r := ®ionStatistics{ - opt: opt, - classifier: classifier, - stats: make(map[regionStatisticType]map[uint64]*core.RegionInfo), - index: make(map[uint64]regionStatisticType), + opt: opt, + classifier: classifier, + stats: make(map[regionStatisticType]map[uint64]*core.RegionInfo), + index: make(map[uint64]regionStatisticType), + labelLevelStats: newLabelLevlStatistics(), } r.stats[missPeer] = make(map[uint64]*core.RegionInfo) r.stats[extraPeer] = make(map[uint64]*core.RegionInfo) @@ -107,8 +111,47 @@ func (r *regionStatistics) Observe(region *core.RegionInfo, stores []*core.Store if len(stores) == 0 { return } + r.labelLevelStats.Observe(region, stores, labels) +} + +func (r *regionStatistics) Collect() { + regionStatusGauge.WithLabelValues("miss_peer_region_count").Set(float64(len(r.stats[missPeer]))) + regionStatusGauge.WithLabelValues("extra_peer_region_count").Set(float64(len(r.stats[extraPeer]))) + regionStatusGauge.WithLabelValues("down_peer_region_count").Set(float64(len(r.stats[downPeer]))) + regionStatusGauge.WithLabelValues("pending_peer_region_count").Set(float64(len(r.stats[pendingPeer]))) + regionStatusGauge.WithLabelValues("incorrect_namespace_region_count").Set(float64(len(r.stats[incorrectNamespace]))) + r.labelLevelStats.Collect() +} + +type labelLevelStatistics struct { + regionLabelLevelStats map[uint64]int + labelLevelCounter map[int]int +} + +func newLabelLevlStatistics() *labelLevelStatistics { + return &labelLevelStatistics{ + regionLabelLevelStats: make(map[uint64]int), + labelLevelCounter: make(map[int]int), + } +} + +func (l *labelLevelStatistics) Observe(region *core.RegionInfo, stores []*core.StoreInfo, labels []string) { + regionID := region.GetId() regionLabelLevel := getRegionLabelIsolationLevel(stores, labels) - regionLabelLevelHistogram.Observe(float64(regionLabelLevel)) + if level, ok := l.regionLabelLevelStats[regionID]; ok { + if level == regionLabelLevel { + return + } + l.labelLevelCounter[level]-- + } + l.regionLabelLevelStats[regionID] = regionLabelLevel + l.labelLevelCounter[regionLabelLevel]++ +} +func (l *labelLevelStatistics) Collect() { + for level, count := range l.labelLevelCounter { + typ := fmt.Sprintf("level_%d", level) + regionStatusGauge.WithLabelValues(typ).Set(float64(count)) + } } func getRegionLabelIsolationLevel(stores []*core.StoreInfo, labels []string) int { @@ -149,11 +192,3 @@ func notIsolatedStoresWithLabel(stores []*core.StoreInfo, label string) [][]*cor } return res } - -func (r *regionStatistics) Collect() { - regionStatusGauge.WithLabelValues("miss_peer_region_count").Set(float64(len(r.stats[missPeer]))) - regionStatusGauge.WithLabelValues("extra_peer_region_count").Set(float64(len(r.stats[extraPeer]))) - regionStatusGauge.WithLabelValues("down_peer_region_count").Set(float64(len(r.stats[downPeer]))) - regionStatusGauge.WithLabelValues("pending_peer_region_count").Set(float64(len(r.stats[pendingPeer]))) - regionStatusGauge.WithLabelValues("incorrect_namespace_region_count").Set(float64(len(r.stats[incorrectNamespace]))) -} diff --git a/server/region_statistics_test.go b/server/region_statistics_test.go index 247f2cb0b52..901d54b088b 100644 --- a/server/region_statistics_test.go +++ b/server/region_statistics_test.go @@ -95,6 +95,7 @@ func (t *testRegionStatistcs) TestRegionStatistics(c *C) { } func (t *testRegionStatistcs) TestRegionLabelIsolationLevel(c *C) { + labelLevelStats := newLabelLevlStatistics() labelsSet := [][]map[string]string{ { {"zone": "z1", "rack": "r1", "host": "h1"}, @@ -123,6 +124,8 @@ func (t *testRegionStatistcs) TestRegionLabelIsolationLevel(c *C) { }, } res := []int{2, 3, 1, 2, 0} + counter := []int{1, 1, 2, 1, 0} + regionID := 1 f := func(labels []map[string]string, res int) { metaStores := []*metapb.Store{ {Id: 1, Address: "mock://tikv-1"}, @@ -137,14 +140,20 @@ func (t *testRegionStatistcs) TestRegionLabelIsolationLevel(c *C) { } stores = append(stores, s) } + region := core.NewRegionInfo(&metapb.Region{Id: uint64(regionID)}, nil) level := getRegionLabelIsolationLevel(stores, []string{"zone", "rack", "host"}) + labelLevelStats.Observe(region, stores, []string{"zone", "rack", "host"}) c.Assert(level, Equals, res) + regionID++ } for i, labels := range labelsSet { f(labels, res[i]) - } + for i, res := range counter { + c.Assert(labelLevelStats.labelLevelCounter[i], Equals, res) + } + level := getRegionLabelIsolationLevel(nil, []string{"zone", "rack", "host"}) c.Assert(level, Equals, 0) level = getRegionLabelIsolationLevel(nil, nil) From a0b7141701f57eb3aed4c9b3fbdb0b17690b1eb6 Mon Sep 17 00:00:00 2001 From: nolouch Date: Wed, 7 Mar 2018 15:54:25 +0800 Subject: [PATCH 2/3] address comments --- server/cache.go | 5 +++++ server/region_statistics.go | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/server/cache.go b/server/cache.go index 23d82abeb5d..c0616b8d0ac 100644 --- a/server/cache.go +++ b/server/cache.go @@ -470,6 +470,11 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error { } } } + if c.regionStats != nil { + for _, item := range overlaps { + c.regionStats.clearDefunctRegion(item.GetId()) + } + } // Update related stores. if origin != nil { diff --git a/server/region_statistics.go b/server/region_statistics.go index 82d135fd32e..934d6a8ce1a 100644 --- a/server/region_statistics.go +++ b/server/region_statistics.go @@ -114,6 +114,13 @@ func (r *regionStatistics) Observe(region *core.RegionInfo, stores []*core.Store r.labelLevelStats.Observe(region, stores, labels) } +func (r *regionStatistics) clearDefunctRegion(regionID uint64) { + if oldIndex, ok := r.index[regionID]; ok { + r.deleteEntry(oldIndex, regionID) + } + r.labelLevelStats.clearDefunctRegion(regionID) +} + func (r *regionStatistics) Collect() { regionStatusGauge.WithLabelValues("miss_peer_region_count").Set(float64(len(r.stats[missPeer]))) regionStatusGauge.WithLabelValues("extra_peer_region_count").Set(float64(len(r.stats[extraPeer]))) @@ -147,6 +154,7 @@ func (l *labelLevelStatistics) Observe(region *core.RegionInfo, stores []*core.S l.regionLabelLevelStats[regionID] = regionLabelLevel l.labelLevelCounter[regionLabelLevel]++ } + func (l *labelLevelStatistics) Collect() { for level, count := range l.labelLevelCounter { typ := fmt.Sprintf("level_%d", level) @@ -154,6 +162,13 @@ func (l *labelLevelStatistics) Collect() { } } +func (l *labelLevelStatistics) clearDefunctRegion(regionID uint64) { + if level, ok := l.regionLabelLevelStats[regionID]; ok { + l.labelLevelCounter[level]-- + delete(l.regionLabelLevelStats, regionID) + } +} + func getRegionLabelIsolationLevel(stores []*core.StoreInfo, labels []string) int { if len(stores) == 0 || len(labels) == 0 { return 0 From fc72631e5502baff95645b02426c43c992de395a Mon Sep 17 00:00:00 2001 From: nolouch Date: Mon, 12 Mar 2018 17:23:05 +0800 Subject: [PATCH 3/3] use backgroud with label level statistic --- server/cache.go | 38 +++++++++++++++++++++----------- server/coordinator.go | 2 ++ server/region_statistics.go | 28 +++++++++-------------- server/region_statistics_test.go | 8 +++---- 4 files changed, 41 insertions(+), 35 deletions(-) diff --git a/server/cache.go b/server/cache.go index c0616b8d0ac..c78b983307c 100644 --- a/server/cache.go +++ b/server/cache.go @@ -40,20 +40,22 @@ type clusterInfo struct { sync.RWMutex *schedule.BasicCluster - id core.IDAllocator - kv *core.KV - meta *metapb.Cluster - activeRegions int - opt *scheduleOption - regionStats *regionStatistics + id core.IDAllocator + kv *core.KV + meta *metapb.Cluster + activeRegions int + opt *scheduleOption + regionStats *regionStatistics + labelLevelStats *labelLevelStatistics } func newClusterInfo(id core.IDAllocator, opt *scheduleOption, kv *core.KV) *clusterInfo { return &clusterInfo{ - BasicCluster: schedule.NewBasicCluster(), - id: id, - opt: opt, - kv: kv, + BasicCluster: schedule.NewBasicCluster(), + id: id, + opt: opt, + kv: kv, + labelLevelStats: newLabelLevelStatistics(), } } @@ -470,10 +472,11 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error { } } } - if c.regionStats != nil { - for _, item := range overlaps { + for _, item := range overlaps { + if c.regionStats != nil { c.regionStats.clearDefunctRegion(item.GetId()) } + c.labelLevelStats.clearDefunctRegion(item.GetId()) } // Update related stores. @@ -488,7 +491,7 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error { } if c.regionStats != nil { - c.regionStats.Observe(region, c.getRegionStores(region), c.GetLocationLabels()) + c.regionStats.Observe(region, c.getRegionStores(region)) } key := region.GetId() @@ -509,6 +512,14 @@ func (c *clusterInfo) handleRegionHeartbeat(region *core.RegionInfo) error { return nil } +func (c *clusterInfo) updateRegionsLabelLevelStats(regions []*core.RegionInfo) { + c.Lock() + defer c.Unlock() + for _, region := range regions { + c.labelLevelStats.Observe(region, c.getRegionStores(region), c.GetLocationLabels()) + } +} + func (c *clusterInfo) collectMetrics() { if c.regionStats == nil { return @@ -516,6 +527,7 @@ func (c *clusterInfo) collectMetrics() { c.RLock() defer c.RUnlock() c.regionStats.Collect() + c.labelLevelStats.Collect() } func (c *clusterInfo) GetRegionStatsByType(typ regionStatisticType) []*core.RegionInfo { diff --git a/server/coordinator.go b/server/coordinator.go index 4988ae3bb93..124c2a60eff 100644 --- a/server/coordinator.go +++ b/server/coordinator.go @@ -150,6 +150,8 @@ func (c *coordinator) patrolRegions() { break } } + // update label level isolation statistics. + c.cluster.updateRegionsLabelLevelStats(regions) } } diff --git a/server/region_statistics.go b/server/region_statistics.go index 934d6a8ce1a..01f1d67752c 100644 --- a/server/region_statistics.go +++ b/server/region_statistics.go @@ -31,20 +31,18 @@ const ( ) type regionStatistics struct { - opt *scheduleOption - classifier namespace.Classifier - stats map[regionStatisticType]map[uint64]*core.RegionInfo - index map[uint64]regionStatisticType - labelLevelStats *labelLevelStatistics + opt *scheduleOption + classifier namespace.Classifier + stats map[regionStatisticType]map[uint64]*core.RegionInfo + index map[uint64]regionStatisticType } func newRegionStatistics(opt *scheduleOption, classifier namespace.Classifier) *regionStatistics { r := ®ionStatistics{ - opt: opt, - classifier: classifier, - stats: make(map[regionStatisticType]map[uint64]*core.RegionInfo), - index: make(map[uint64]regionStatisticType), - labelLevelStats: newLabelLevlStatistics(), + opt: opt, + classifier: classifier, + stats: make(map[regionStatisticType]map[uint64]*core.RegionInfo), + index: make(map[uint64]regionStatisticType), } r.stats[missPeer] = make(map[uint64]*core.RegionInfo) r.stats[extraPeer] = make(map[uint64]*core.RegionInfo) @@ -70,7 +68,7 @@ func (r *regionStatistics) deleteEntry(deleteIndex regionStatisticType, regionID } } -func (r *regionStatistics) Observe(region *core.RegionInfo, stores []*core.StoreInfo, labels []string) { +func (r *regionStatistics) Observe(region *core.RegionInfo, stores []*core.StoreInfo) { // Region state. regionID := region.GetId() namespace := r.classifier.GetRegionNamespace(region) @@ -108,17 +106,12 @@ func (r *regionStatistics) Observe(region *core.RegionInfo, stores []*core.Store } r.deleteEntry(deleteIndex, regionID) r.index[regionID] = peerTypeIndex - if len(stores) == 0 { - return - } - r.labelLevelStats.Observe(region, stores, labels) } func (r *regionStatistics) clearDefunctRegion(regionID uint64) { if oldIndex, ok := r.index[regionID]; ok { r.deleteEntry(oldIndex, regionID) } - r.labelLevelStats.clearDefunctRegion(regionID) } func (r *regionStatistics) Collect() { @@ -127,7 +120,6 @@ func (r *regionStatistics) Collect() { regionStatusGauge.WithLabelValues("down_peer_region_count").Set(float64(len(r.stats[downPeer]))) regionStatusGauge.WithLabelValues("pending_peer_region_count").Set(float64(len(r.stats[pendingPeer]))) regionStatusGauge.WithLabelValues("incorrect_namespace_region_count").Set(float64(len(r.stats[incorrectNamespace]))) - r.labelLevelStats.Collect() } type labelLevelStatistics struct { @@ -135,7 +127,7 @@ type labelLevelStatistics struct { labelLevelCounter map[int]int } -func newLabelLevlStatistics() *labelLevelStatistics { +func newLabelLevelStatistics() *labelLevelStatistics { return &labelLevelStatistics{ regionLabelLevelStats: make(map[uint64]int), labelLevelCounter: make(map[int]int), diff --git a/server/region_statistics_test.go b/server/region_statistics_test.go index 901d54b088b..257e36418da 100644 --- a/server/region_statistics_test.go +++ b/server/region_statistics_test.go @@ -75,18 +75,18 @@ func (t *testRegionStatistcs) TestRegionStatistics(c *C) { region1 := core.NewRegionInfo(r1, peers[0]) region2 := core.NewRegionInfo(r2, peers[0]) regionStats := newRegionStatistics(opt, mockClassifier{}) - regionStats.Observe(region1, stores, nil) + regionStats.Observe(region1, stores) c.Assert(len(regionStats.stats[extraPeer]), Equals, 1) region1.DownPeers = downPeers region1.PendingPeers = peers[0:1] - regionStats.Observe(region1, stores, nil) + regionStats.Observe(region1, stores) c.Assert(len(regionStats.stats[extraPeer]), Equals, 1) c.Assert(len(regionStats.stats[missPeer]), Equals, 0) c.Assert(len(regionStats.stats[downPeer]), Equals, 1) c.Assert(len(regionStats.stats[pendingPeer]), Equals, 1) c.Assert(len(regionStats.stats[incorrectNamespace]), Equals, 1) region2.DownPeers = downPeers[0:1] - regionStats.Observe(region2, stores[0:2], nil) + regionStats.Observe(region2, stores[0:2]) c.Assert(len(regionStats.stats[extraPeer]), Equals, 1) c.Assert(len(regionStats.stats[missPeer]), Equals, 1) c.Assert(len(regionStats.stats[downPeer]), Equals, 2) @@ -95,7 +95,7 @@ func (t *testRegionStatistcs) TestRegionStatistics(c *C) { } func (t *testRegionStatistcs) TestRegionLabelIsolationLevel(c *C) { - labelLevelStats := newLabelLevlStatistics() + labelLevelStats := newLabelLevelStatistics() labelsSet := [][]map[string]string{ { {"zone": "z1", "rack": "r1", "host": "h1"},