-
Notifications
You must be signed in to change notification settings - Fork 146
refactor(metrics): use default prometheus go metrics collector, and implement standard Prometheus gauges for existing metrics #2219
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f27e746
48591bb
7f951a2
a361075
1608caf
7b789bf
7710c21
22154bf
51c847a
c440972
a1be7fe
461d1f4
ad69fb0
055cde1
66e6358
5eab11d
7e4fdf7
869d8a6
524569d
8b67688
3919e7e
d75311b
3f643c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
This file was deleted.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,17 +11,17 @@ import ( | |
| "sync" | ||
| "time" | ||
|
|
||
| "github.com/ethereum/go-ethereum/metrics" | ||
| libp2pnetwork "github.com/libp2p/go-libp2p-core/network" | ||
| "github.com/libp2p/go-libp2p-core/peer" | ||
| "github.com/libp2p/go-libp2p-core/protocol" | ||
|
|
||
| gssmrmetrics "github.com/ChainSafe/gossamer/dot/metrics" | ||
| "github.com/ChainSafe/gossamer/dot/peerset" | ||
| "github.com/ChainSafe/gossamer/dot/telemetry" | ||
| "github.com/ChainSafe/gossamer/internal/log" | ||
| "github.com/ChainSafe/gossamer/internal/metrics" | ||
| "github.com/ChainSafe/gossamer/lib/common" | ||
| "github.com/ChainSafe/gossamer/lib/services" | ||
| libp2pnetwork "github.com/libp2p/go-libp2p-core/network" | ||
| "github.com/libp2p/go-libp2p-core/peer" | ||
| "github.com/libp2p/go-libp2p-core/protocol" | ||
| "github.com/prometheus/client_golang/prometheus" | ||
| "github.com/prometheus/client_golang/prometheus/promauto" | ||
| ) | ||
|
|
||
| const ( | ||
|
|
@@ -35,14 +35,58 @@ const ( | |
| transactionsID = "/transactions/1" | ||
|
|
||
| maxMessageSize = 1024 * 63 // 63kb for now | ||
|
|
||
| gssmrIsMajorSyncMetric = "gossamer/network/is_major_syncing" | ||
| ) | ||
|
|
||
| var ( | ||
| _ services.Service = &Service{} | ||
| logger = log.NewFromGlobal(log.AddContext("pkg", "network")) | ||
| maxReads = 256 | ||
|
|
||
| peerCountGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_node", | ||
| Name: "peer_count_total", | ||
| Help: "total peer count", | ||
| }) | ||
| connectionsGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_node", | ||
| Name: "connections_total", | ||
| Help: "total number of connections", | ||
| }) | ||
| nodeLatencyGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_node", | ||
| Name: "latency_ms", | ||
| Help: "average node latency in milliseconds", | ||
| }) | ||
| inboundBlockAnnounceStreamsGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_streams_block_announce", | ||
| Name: "inbound_total", | ||
| Help: "total number of inbound block announce streams", | ||
| }) | ||
| outboundBlockAnnounceStreamsGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_streams_block_announce", | ||
| Name: "outbound_total", | ||
| Help: "total number of outbound block announce streams", | ||
| }) | ||
| inboundGrandpaStreamsGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_streams_grandpa", | ||
| Name: "inbound_total", | ||
| Help: "total number of inbound grandpa streams", | ||
| }) | ||
| outboundGrandpaStreamsGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_streams_grandpa", | ||
| Name: "outbound_total", | ||
| Help: "total number of outbound grandpa streams", | ||
| }) | ||
| inboundStreamsGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_streams", | ||
| Name: "inbound_total", | ||
| Help: "total number of inbound streams", | ||
| }) | ||
| outboundStreamsGauge = promauto.NewGauge(prometheus.GaugeOpts{ | ||
| Namespace: "gossamer_network_streams", | ||
| Name: "outbound_total", | ||
| Help: "total number of outbound streams", | ||
| }) | ||
| ) | ||
|
|
||
| type ( | ||
|
|
@@ -83,6 +127,8 @@ type Service struct { | |
| noMDNS bool | ||
| noGossip bool // internal option | ||
|
|
||
| Metrics metrics.IntervalConfig | ||
|
|
||
| // telemetry | ||
| telemetryInterval time.Duration | ||
| closeCh chan struct{} | ||
|
|
@@ -166,6 +212,7 @@ func NewService(cfg *Config) (*Service, error) { | |
| streamManager: newStreamManager(ctx), | ||
| blockResponseBuf: make([]byte, maxBlockResponseSize), | ||
| telemetry: cfg.Telemetry, | ||
| Metrics: cfg.Metrics, | ||
| } | ||
|
|
||
| return network, err | ||
|
|
@@ -277,8 +324,8 @@ func (s *Service) Start() error { | |
|
|
||
| logger.Info("started network service with supported protocols " + strings.Join(s.host.protocols(), ", ")) | ||
|
|
||
| if s.cfg.PublishMetrics { | ||
| go s.collectNetworkMetrics() | ||
| if s.Metrics.Publish { | ||
| go s.updateMetrics() | ||
| } | ||
|
|
||
| go s.logPeerCount() | ||
|
|
@@ -289,44 +336,27 @@ func (s *Service) Start() error { | |
| return nil | ||
| } | ||
|
|
||
| func (s *Service) collectNetworkMetrics() { | ||
| func (s *Service) updateMetrics() { | ||
| ticker := time.NewTicker(s.Metrics.Interval) | ||
| defer ticker.Stop() | ||
| for { | ||
| peerCount := metrics.GetOrRegisterGauge("network/node/peerCount", metrics.DefaultRegistry) | ||
| totalConn := metrics.GetOrRegisterGauge("network/node/totalConnection", metrics.DefaultRegistry) | ||
| networkLatency := metrics.GetOrRegisterGauge("network/node/latency", metrics.DefaultRegistry) | ||
| syncedBlocks := metrics.GetOrRegisterGauge( | ||
| "service/blocks/sync", | ||
| metrics.DefaultRegistry) | ||
| numInboundBlockAnnounceStreams := metrics.GetOrRegisterGauge( | ||
| "network/streams/block_announce/inbound", | ||
| metrics.DefaultRegistry) | ||
| numOutboundBlockAnnounceStreams := metrics.GetOrRegisterGauge( | ||
| "network/streams/block_announce/outbound", | ||
| metrics.DefaultRegistry) | ||
| numInboundGrandpaStreams := metrics.GetOrRegisterGauge("network/streams/grandpa/inbound", metrics.DefaultRegistry) | ||
| numOutboundGrandpaStreams := metrics.GetOrRegisterGauge("network/streams/grandpa/outbound", metrics.DefaultRegistry) | ||
| totalInboundStreams := metrics.GetOrRegisterGauge("network/streams/total/inbound", metrics.DefaultRegistry) | ||
| totalOutboundStreams := metrics.GetOrRegisterGauge("network/streams/total/outbound", metrics.DefaultRegistry) | ||
|
|
||
| peerCount.Update(int64(s.host.peerCount())) | ||
| totalConn.Update(int64(len(s.host.h.Network().Conns()))) | ||
| networkLatency.Update(int64(s.host.h.Peerstore().LatencyEWMA(s.host.id()))) | ||
|
|
||
| numInboundBlockAnnounceStreams.Update(s.getNumStreams(BlockAnnounceMsgType, true)) | ||
| numOutboundBlockAnnounceStreams.Update(s.getNumStreams(BlockAnnounceMsgType, false)) | ||
| numInboundGrandpaStreams.Update(s.getNumStreams(ConsensusMsgType, true)) | ||
| numOutboundGrandpaStreams.Update(s.getNumStreams(ConsensusMsgType, false)) | ||
| totalInboundStreams.Update(s.getTotalStreams(true)) | ||
| totalOutboundStreams.Update(s.getTotalStreams(false)) | ||
|
|
||
| num, err := s.blockState.BestBlockNumber() | ||
| if err != nil { | ||
| syncedBlocks.Update(0) | ||
| } else { | ||
| syncedBlocks.Update(num.Int64()) | ||
| select { | ||
| case <-s.ctx.Done(): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe pass the context as a first argument to that function? Even if it means calling
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have another issue to change the |
||
| return | ||
| case <-ticker.C: | ||
| peerCountGauge.Set(float64(s.host.peerCount())) | ||
| connectionsGauge.Set(float64(len(s.host.h.Network().Conns()))) | ||
| nodeLatencyGauge.Set(float64( | ||
| s.host.h.Peerstore().LatencyEWMA(s.host.id()).Milliseconds())) | ||
|
timwu20 marked this conversation as resolved.
|
||
| inboundBlockAnnounceStreamsGauge.Set(float64( | ||
| s.getNumStreams(BlockAnnounceMsgType, true))) | ||
| outboundBlockAnnounceStreamsGauge.Set(float64( | ||
| s.getNumStreams(BlockAnnounceMsgType, false))) | ||
| inboundGrandpaStreamsGauge.Set(float64(s.getNumStreams(ConsensusMsgType, true))) | ||
| outboundGrandpaStreamsGauge.Set(float64(s.getNumStreams(ConsensusMsgType, false))) | ||
| inboundStreamsGauge.Set(float64(s.getTotalStreams(true))) | ||
| outboundStreamsGauge.Set(float64(s.getTotalStreams(false))) | ||
|
qdm12 marked this conversation as resolved.
|
||
| } | ||
|
|
||
| time.Sleep(gssmrmetrics.RefreshInterval) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -615,18 +645,6 @@ func (s *Service) NodeRoles() byte { | |
| return s.cfg.Roles | ||
| } | ||
|
|
||
| // CollectGauge will be used to collect countable metrics from network service | ||
| func (s *Service) CollectGauge() map[string]int64 { | ||
| var isSynced int64 | ||
| if !s.syncer.IsSynced() { | ||
| isSynced = 1 | ||
| } | ||
|
|
||
| return map[string]int64{ | ||
| gssmrIsMajorSyncMetric: isSynced, | ||
| } | ||
| } | ||
|
|
||
| // HighestBlock returns the highest known block number | ||
| func (*Service) HighestBlock() int64 { | ||
| // TODO: refactor this to get the data from the sync service (#1857) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.