diff --git a/CHANGELOG.md b/CHANGELOG.md index 38a540e07a..f40b7f251d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **BREAKING:** Removed `evnode.v1.HealthService` gRPC endpoint. Use HTTP endpoints: `GET /health/live` and `GET /health/ready`. ([#2800](https://github.com/evstack/ev-node/pull/2800)) - **BREAKING:** Removed `TrustedHash` configuration option and `--evnode.node.trusted_hash` flag. Sync service now automatically determines starting height from local store state ([#2838](https://github.com/evstack/ev-node/pull/2838)) +- **BREAKING:** Removed unused and confusing metrics from sequencers and block processing, including sequencer-specific metrics (gas price, blob size, transaction status, pending blocks), channel buffer metrics, overly granular error metrics, block production categorization metrics, and sync lag metrics. Essential metrics for DA submission health, block production, and performance monitoring are retained. [#2904](https://github.com/evstack/ev-node/pull/2904) ### Fixed diff --git a/apps/evm/cmd/run.go b/apps/evm/cmd/run.go index 07ca32312b..eda6d3e1e5 100644 --- a/apps/evm/cmd/run.go +++ b/apps/evm/cmd/run.go @@ -111,11 +111,6 @@ func createSequencer( nodeConfig config.Config, genesis genesis.Genesis, ) (coresequencer.Sequencer, error) { - singleMetrics, err := single.NopMetrics() - if err != nil { - return nil, fmt.Errorf("failed to create single sequencer metrics: %w", err) - } - sequencer, err := single.NewSequencer( ctx, logger, @@ -123,7 +118,6 @@ func createSequencer( da, []byte(genesis.ChainID), nodeConfig.Node.BlockTime.Duration, - singleMetrics, nodeConfig.Node.Aggregator, ) if err != nil { diff --git a/apps/grpc/cmd/run.go b/apps/grpc/cmd/run.go index d47d4b514c..5c35c5a185 100644 --- a/apps/grpc/cmd/run.go +++ b/apps/grpc/cmd/run.go @@ -119,11 +119,6 @@ func createSequencer( nodeConfig config.Config, genesis genesis.Genesis, ) (coresequencer.Sequencer, error) { - singleMetrics, err := single.NopMetrics() - if err != nil { - return nil, fmt.Errorf("failed to create single sequencer metrics: %w", err) - } - sequencer, err := single.NewSequencer( ctx, logger, @@ -131,7 +126,6 @@ func createSequencer( da, []byte(genesis.ChainID), nodeConfig.Node.BlockTime.Duration, - singleMetrics, nodeConfig.Node.Aggregator, ) if err != nil { diff --git a/apps/testapp/cmd/run.go b/apps/testapp/cmd/run.go index 944be2b791..dc913da713 100644 --- a/apps/testapp/cmd/run.go +++ b/apps/testapp/cmd/run.go @@ -68,11 +68,6 @@ var RunCmd = &cobra.Command{ return err } - singleMetrics, err := single.NopMetrics() - if err != nil { - return err - } - // Start the KV executor HTTP server if kvEndpoint != "" { // Only start if endpoint is provided httpServer := kvexecutor.NewHTTPServer(executor, kvEndpoint) @@ -101,7 +96,6 @@ var RunCmd = &cobra.Command{ &daJrpc.DA, []byte(genesis.ChainID), nodeConfig.Node.BlockTime.Duration, - singleMetrics, nodeConfig.Node.Aggregator, ) if err != nil { diff --git a/block/internal/common/metrics.go b/block/internal/common/metrics.go index c543537cf5..2a4c821774 100644 --- a/block/internal/common/metrics.go +++ b/block/internal/common/metrics.go @@ -49,54 +49,22 @@ type Metrics struct { BlockSizeBytes metrics.Gauge // Size of the latest block TotalTxs metrics.Gauge // Total number of transactions CommittedHeight metrics.Gauge `metrics_name:"latest_block_height"` // The latest block height - - // Channel metrics - ChannelBufferUsage map[string]metrics.Gauge - DroppedSignals metrics.Counter - - // Error metrics - ErrorsByType map[string]metrics.Counter - RecoverableErrors metrics.Counter - NonRecoverableErrors metrics.Counter + TxsPerBlock metrics.Histogram // Performance metrics OperationDuration map[string]metrics.Histogram - GoroutineCount metrics.Gauge // DA metrics - DASubmissionAttempts metrics.Counter - DASubmissionSuccesses metrics.Counter - DASubmissionFailures metrics.Counter - DARetrievalAttempts metrics.Counter - DARetrievalSuccesses metrics.Counter - DARetrievalFailures metrics.Counter - DAInclusionHeight metrics.Gauge - PendingHeadersCount metrics.Gauge - PendingDataCount metrics.Gauge - - // Sync metrics - SyncLag metrics.Gauge - HeadersSynced metrics.Counter - DataSynced metrics.Counter - BlocksApplied metrics.Counter - InvalidHeadersCount metrics.Counter - - // Block production metrics - BlockProductionTime metrics.Histogram - EmptyBlocksProduced metrics.Counter - LazyBlocksProduced metrics.Counter - NormalBlocksProduced metrics.Counter - TxsPerBlock metrics.Histogram - - // State transition metrics - StateTransitions map[string]metrics.Counter - InvalidTransitions metrics.Counter - - // DA Submitter metrics DASubmitterFailures map[DASubmitterFailureReason]metrics.Counter // Counter with reason label DASubmitterLastFailure map[DASubmitterFailureReason]metrics.Gauge // Timestamp gauge with reason label DASubmitterPendingBlobs metrics.Gauge // Total number of blobs awaiting submission (backlog) DASubmitterResends metrics.Counter // Number of resend attempts + DARetrievalAttempts metrics.Counter + DARetrievalSuccesses metrics.Counter + DARetrievalFailures metrics.Counter + DAInclusionHeight metrics.Gauge + PendingHeadersCount metrics.Gauge + PendingDataCount metrics.Gauge } // PrometheusMetrics returns Metrics built using Prometheus client library @@ -107,10 +75,7 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { } m := &Metrics{ - ChannelBufferUsage: make(map[string]metrics.Gauge), - ErrorsByType: make(map[string]metrics.Counter), OperationDuration: make(map[string]metrics.Histogram), - StateTransitions: make(map[string]metrics.Counter), DASubmitterFailures: make(map[DASubmitterFailureReason]metrics.Counter), DASubmitterLastFailure: make(map[DASubmitterFailureReason]metrics.Gauge), } @@ -151,63 +116,12 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Help: "The latest block height.", }, labels).With(labelsAndValues...) - // Channel metrics - m.DroppedSignals = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "dropped_signals_total", - Help: "Total number of dropped channel signals", - }, labels).With(labelsAndValues...) - - // Initialize channel buffer usage gauges - channelNames := []string{"height_in", "header_store", "data_store", "retrieve", "da_includer", "tx_notify"} - for _, name := range channelNames { - m.ChannelBufferUsage[name] = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "channel_buffer_usage", - Help: "Current buffer usage of channels", - ConstLabels: map[string]string{ - "channel": name, - }, - }, labels).With(labelsAndValues...) - } - - // Error metrics - m.RecoverableErrors = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "recoverable_errors_total", - Help: "Total number of recoverable errors", - }, labels).With(labelsAndValues...) - - m.NonRecoverableErrors = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "non_recoverable_errors_total", - Help: "Total number of non-recoverable errors", - }, labels).With(labelsAndValues...) - - // Initialize error type counters - errorTypes := []string{"block_production", "da_submission", "sync", "validation", "state_update"} - for _, errType := range errorTypes { - m.ErrorsByType[errType] = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "errors_by_type_total", - Help: "Total number of errors by type", - ConstLabels: map[string]string{ - "error_type": errType, - }, - }, labels).With(labelsAndValues...) - } - - // Performance metrics - m.GoroutineCount = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + m.TxsPerBlock = prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, - Name: "goroutines_count", - Help: "Current number of goroutines", + Name: "txs_per_block", + Help: "Number of transactions per block", + Buckets: []float64{0, 1, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000}, }, labels).With(labelsAndValues...) // Initialize operation duration histograms @@ -226,27 +140,6 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { } // DA metrics - m.DASubmissionAttempts = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "da_submission_attempts_total", - Help: "Total number of DA submission attempts", - }, labels).With(labelsAndValues...) - - m.DASubmissionSuccesses = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "da_submission_successes_total", - Help: "Total number of successful DA submissions", - }, labels).With(labelsAndValues...) - - m.DASubmissionFailures = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "da_submission_failures_total", - Help: "Total number of failed DA submissions", - }, labels).With(labelsAndValues...) - m.DARetrievalAttempts = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -289,102 +182,6 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Help: "Number of data blocks pending DA submission", }, labels).With(labelsAndValues...) - // Sync metrics - m.SyncLag = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "sync_lag_blocks", - Help: "Number of blocks behind the head", - }, labels).With(labelsAndValues...) - - m.HeadersSynced = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "headers_synced_total", - Help: "Total number of headers synced", - }, labels).With(labelsAndValues...) - - m.DataSynced = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "data_synced_total", - Help: "Total number of data blocks synced", - }, labels).With(labelsAndValues...) - - m.BlocksApplied = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "blocks_applied_total", - Help: "Total number of blocks applied to state", - }, labels).With(labelsAndValues...) - - m.InvalidHeadersCount = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "invalid_headers_total", - Help: "Total number of invalid headers rejected", - }, labels).With(labelsAndValues...) - - // Block production metrics - m.BlockProductionTime = prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "block_production_duration_seconds", - Help: "Time taken to produce a block", - Buckets: []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1}, - }, labels).With(labelsAndValues...) - - m.EmptyBlocksProduced = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "empty_blocks_produced_total", - Help: "Total number of empty blocks produced", - }, labels).With(labelsAndValues...) - - m.LazyBlocksProduced = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "lazy_blocks_produced_total", - Help: "Total number of blocks produced in lazy mode", - }, labels).With(labelsAndValues...) - - m.NormalBlocksProduced = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "normal_blocks_produced_total", - Help: "Total number of blocks produced in normal mode", - }, labels).With(labelsAndValues...) - - m.TxsPerBlock = prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "txs_per_block", - Help: "Number of transactions per block", - Buckets: []float64{0, 1, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000}, - }, labels).With(labelsAndValues...) - - // State transition metrics - m.InvalidTransitions = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "invalid_state_transitions_total", - Help: "Total number of invalid state transitions attempted", - }, labels).With(labelsAndValues...) - - // Initialize state transition counters - transitions := []string{"pending_to_submitted", "submitted_to_included", "included_to_finalized"} - for _, transition := range transitions { - m.StateTransitions[transition] = prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "state_transitions_total", - Help: "Total number of state transitions", - ConstLabels: map[string]string{ - "transition": transition, - }, - }, labels).With(labelsAndValues...) - } - // DA Submitter metrics m.DASubmitterPendingBlobs = prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, @@ -435,36 +232,16 @@ func NopMetrics() *Metrics { BlockSizeBytes: discard.NewGauge(), TotalTxs: discard.NewGauge(), CommittedHeight: discard.NewGauge(), + TxsPerBlock: discard.NewHistogram(), // Extended metrics - ChannelBufferUsage: make(map[string]metrics.Gauge), - ErrorsByType: make(map[string]metrics.Counter), OperationDuration: make(map[string]metrics.Histogram), - StateTransitions: make(map[string]metrics.Counter), - DroppedSignals: discard.NewCounter(), - RecoverableErrors: discard.NewCounter(), - NonRecoverableErrors: discard.NewCounter(), - GoroutineCount: discard.NewGauge(), - DASubmissionAttempts: discard.NewCounter(), - DASubmissionSuccesses: discard.NewCounter(), - DASubmissionFailures: discard.NewCounter(), DARetrievalAttempts: discard.NewCounter(), DARetrievalSuccesses: discard.NewCounter(), DARetrievalFailures: discard.NewCounter(), DAInclusionHeight: discard.NewGauge(), PendingHeadersCount: discard.NewGauge(), PendingDataCount: discard.NewGauge(), - SyncLag: discard.NewGauge(), - HeadersSynced: discard.NewCounter(), - DataSynced: discard.NewCounter(), - BlocksApplied: discard.NewCounter(), - InvalidHeadersCount: discard.NewCounter(), - BlockProductionTime: discard.NewHistogram(), - EmptyBlocksProduced: discard.NewCounter(), - LazyBlocksProduced: discard.NewCounter(), - NormalBlocksProduced: discard.NewCounter(), - TxsPerBlock: discard.NewHistogram(), - InvalidTransitions: discard.NewCounter(), DASubmitterFailures: make(map[DASubmitterFailureReason]metrics.Counter), DASubmitterLastFailure: make(map[DASubmitterFailureReason]metrics.Gauge), DASubmitterPendingBlobs: discard.NewGauge(), @@ -472,26 +249,11 @@ func NopMetrics() *Metrics { } // Initialize maps with no-op metrics - channelNames := []string{"height_in", "header_store", "data_store", "retrieve", "da_includer", "tx_notify"} - for _, name := range channelNames { - m.ChannelBufferUsage[name] = discard.NewGauge() - } - - errorTypes := []string{"block_production", "da_submission", "sync", "validation", "state_update"} - for _, errType := range errorTypes { - m.ErrorsByType[errType] = discard.NewCounter() - } - operations := []string{"block_production", "da_submission", "block_retrieval", "block_validation", "state_update"} for _, op := range operations { m.OperationDuration[op] = discard.NewHistogram() } - transitions := []string{"pending_to_submitted", "submitted_to_included", "included_to_finalized"} - for _, transition := range transitions { - m.StateTransitions[transition] = discard.NewCounter() - } - // Initialize DA submitter failure maps with no-op metrics for _, reason := range AllDASubmitterFailureReasons() { m.DASubmitterFailures[reason] = discard.NewCounter() diff --git a/block/internal/common/metrics_test.go b/block/internal/common/metrics_test.go index da9a289152..c72c481b39 100644 --- a/block/internal/common/metrics_test.go +++ b/block/internal/common/metrics_test.go @@ -16,23 +16,7 @@ func TestMetrics(t *testing.T) { assert.NotNil(t, em.BlockSizeBytes) assert.NotNil(t, em.TotalTxs) assert.NotNil(t, em.CommittedHeight) - - // Test channel metrics initialization - channelNames := []string{"height_in", "header_store", "data_store", "retrieve", "da_includer", "tx_notify"} - assert.Len(t, em.ChannelBufferUsage, len(channelNames)) - for _, name := range channelNames { - assert.NotNil(t, em.ChannelBufferUsage[name]) - } - assert.NotNil(t, em.DroppedSignals) - - // Test error metrics initialization - errorTypes := []string{"block_production", "da_submission", "sync", "validation", "state_update"} - assert.Len(t, em.ErrorsByType, len(errorTypes)) - for _, errType := range errorTypes { - assert.NotNil(t, em.ErrorsByType[errType]) - } - assert.NotNil(t, em.RecoverableErrors) - assert.NotNil(t, em.NonRecoverableErrors) + assert.NotNil(t, em.TxsPerBlock) // Test performance metrics initialization operations := []string{"block_production", "da_submission", "block_retrieval", "block_validation", "state_update"} @@ -40,40 +24,14 @@ func TestMetrics(t *testing.T) { for _, op := range operations { assert.NotNil(t, em.OperationDuration[op]) } - assert.NotNil(t, em.GoroutineCount) // Test DA metrics initialization - assert.NotNil(t, em.DASubmissionAttempts) - assert.NotNil(t, em.DASubmissionSuccesses) - assert.NotNil(t, em.DASubmissionFailures) assert.NotNil(t, em.DARetrievalAttempts) assert.NotNil(t, em.DARetrievalSuccesses) assert.NotNil(t, em.DARetrievalFailures) assert.NotNil(t, em.DAInclusionHeight) assert.NotNil(t, em.PendingHeadersCount) assert.NotNil(t, em.PendingDataCount) - - // Test sync metrics initialization - assert.NotNil(t, em.SyncLag) - assert.NotNil(t, em.HeadersSynced) - assert.NotNil(t, em.DataSynced) - assert.NotNil(t, em.BlocksApplied) - assert.NotNil(t, em.InvalidHeadersCount) - - // Test block production metrics initialization - assert.NotNil(t, em.BlockProductionTime) - assert.NotNil(t, em.EmptyBlocksProduced) - assert.NotNil(t, em.LazyBlocksProduced) - assert.NotNil(t, em.NormalBlocksProduced) - assert.NotNil(t, em.TxsPerBlock) - - // Test state transition metrics initialization - transitions := []string{"pending_to_submitted", "submitted_to_included", "included_to_finalized"} - assert.Len(t, em.StateTransitions, len(transitions)) - for _, transition := range transitions { - assert.NotNil(t, em.StateTransitions[transition]) - } - assert.NotNil(t, em.InvalidTransitions) }) t.Run("NopMetrics", func(t *testing.T) { @@ -85,25 +43,7 @@ func TestMetrics(t *testing.T) { assert.NotNil(t, em.BlockSizeBytes) assert.NotNil(t, em.TotalTxs) assert.NotNil(t, em.CommittedHeight) - - // Test extended metrics - assert.NotNil(t, em.DroppedSignals) - assert.NotNil(t, em.RecoverableErrors) - assert.NotNil(t, em.NonRecoverableErrors) - assert.NotNil(t, em.GoroutineCount) - - // Test maps are initialized - channelNames := []string{"height_in", "header_store", "data_store", "retrieve", "da_includer", "tx_notify"} - assert.Len(t, em.ChannelBufferUsage, len(channelNames)) - for _, name := range channelNames { - assert.NotNil(t, em.ChannelBufferUsage[name]) - } - - errorTypes := []string{"block_production", "da_submission", "sync", "validation", "state_update"} - assert.Len(t, em.ErrorsByType, len(errorTypes)) - for _, errType := range errorTypes { - assert.NotNil(t, em.ErrorsByType[errType]) - } + assert.NotNil(t, em.TxsPerBlock) operations := []string{"block_production", "da_submission", "block_retrieval", "block_validation", "state_update"} assert.Len(t, em.OperationDuration, len(operations)) @@ -111,16 +51,7 @@ func TestMetrics(t *testing.T) { assert.NotNil(t, em.OperationDuration[op]) } - transitions := []string{"pending_to_submitted", "submitted_to_included", "included_to_finalized"} - assert.Len(t, em.StateTransitions, len(transitions)) - for _, transition := range transitions { - assert.NotNil(t, em.StateTransitions[transition]) - } - // Test DA metrics - assert.NotNil(t, em.DASubmissionAttempts) - assert.NotNil(t, em.DASubmissionSuccesses) - assert.NotNil(t, em.DASubmissionFailures) assert.NotNil(t, em.DARetrievalAttempts) assert.NotNil(t, em.DARetrievalSuccesses) assert.NotNil(t, em.DARetrievalFailures) @@ -128,31 +59,9 @@ func TestMetrics(t *testing.T) { assert.NotNil(t, em.PendingHeadersCount) assert.NotNil(t, em.PendingDataCount) - // Test sync metrics - assert.NotNil(t, em.SyncLag) - assert.NotNil(t, em.HeadersSynced) - assert.NotNil(t, em.DataSynced) - assert.NotNil(t, em.BlocksApplied) - assert.NotNil(t, em.InvalidHeadersCount) - - // Test block production metrics - assert.NotNil(t, em.BlockProductionTime) - assert.NotNil(t, em.EmptyBlocksProduced) - assert.NotNil(t, em.LazyBlocksProduced) - assert.NotNil(t, em.NormalBlocksProduced) - assert.NotNil(t, em.TxsPerBlock) - assert.NotNil(t, em.InvalidTransitions) - // Verify no-op metrics don't panic when used em.Height.Set(100) - em.DroppedSignals.Add(1) - em.RecoverableErrors.Add(1) - em.GoroutineCount.Set(100) - em.BlockProductionTime.Observe(0.5) - em.ChannelBufferUsage["height_in"].Set(5) - em.ErrorsByType["block_production"].Add(1) em.OperationDuration["block_production"].Observe(0.1) - em.StateTransitions["pending_to_submitted"].Add(1) }) } @@ -164,10 +73,7 @@ func TestMetricsWithLabels(t *testing.T) { // All metrics should be properly initialized assert.NotNil(t, em.Height) assert.NotNil(t, em.NumTxs) - assert.Len(t, em.ChannelBufferUsage, 6) - assert.Len(t, em.ErrorsByType, 5) assert.Len(t, em.OperationDuration, 5) - assert.Len(t, em.StateTransitions, 3) }) t.Run("PrometheusMetricsNoLabels", func(t *testing.T) { @@ -177,10 +83,7 @@ func TestMetricsWithLabels(t *testing.T) { // All metrics should still be properly initialized assert.NotNil(t, em.Height) assert.NotNil(t, em.NumTxs) - assert.Len(t, em.ChannelBufferUsage, 6) - assert.Len(t, em.ErrorsByType, 5) assert.Len(t, em.OperationDuration, 5) - assert.Len(t, em.StateTransitions, 3) }) } @@ -196,24 +99,7 @@ func TestMetricsIntegration(t *testing.T) { em.BlockSizeBytes.Set(2048) em.TotalTxs.Set(50000) em.CommittedHeight.Set(999) - - // Test extended metric operations - em.DroppedSignals.Add(1) - em.RecoverableErrors.Add(1) - em.NonRecoverableErrors.Add(1) - em.GoroutineCount.Set(50) - - // Test channel metrics - em.ChannelBufferUsage["height_in"].Set(5) - em.ChannelBufferUsage["header_store"].Set(2) - em.ChannelBufferUsage["data_store"].Set(3) - - // Test error metrics - em.ErrorsByType["block_production"].Add(1) - em.ErrorsByType["da_submission"].Add(2) - em.ErrorsByType["sync"].Add(0) - em.ErrorsByType["validation"].Add(1) - em.ErrorsByType["state_update"].Add(0) + em.TxsPerBlock.Observe(100) // Test operation duration em.OperationDuration["block_production"].Observe(0.05) @@ -223,35 +109,12 @@ func TestMetricsIntegration(t *testing.T) { em.OperationDuration["state_update"].Observe(0.01) // Test DA metrics - em.DASubmissionAttempts.Add(5) - em.DASubmissionSuccesses.Add(4) - em.DASubmissionFailures.Add(1) em.DARetrievalAttempts.Add(10) em.DARetrievalSuccesses.Add(9) em.DARetrievalFailures.Add(1) em.DAInclusionHeight.Set(995) em.PendingHeadersCount.Set(3) em.PendingDataCount.Set(2) - - // Test sync metrics - em.SyncLag.Set(5) - em.HeadersSynced.Add(10) - em.DataSynced.Add(10) - em.BlocksApplied.Add(10) - em.InvalidHeadersCount.Add(0) - - // Test block production metrics - em.BlockProductionTime.Observe(0.02) - em.TxsPerBlock.Observe(100) - em.EmptyBlocksProduced.Add(2) - em.LazyBlocksProduced.Add(5) - em.NormalBlocksProduced.Add(10) - - // Test state transitions - em.StateTransitions["pending_to_submitted"].Add(15) - em.StateTransitions["submitted_to_included"].Add(14) - em.StateTransitions["included_to_finalized"].Add(13) - em.InvalidTransitions.Add(0) } func TestMetricsSubsystem(t *testing.T) { diff --git a/block/internal/executing/executor.go b/block/internal/executing/executor.go index be969b1a75..fae9a5157f 100644 --- a/block/internal/executing/executor.go +++ b/block/internal/executing/executor.go @@ -416,7 +416,6 @@ func (e *Executor) produceBlock() error { // Update in-memory state after successful commit e.setLastState(newState) - e.metrics.Height.Set(float64(newState.LastBlockHeight)) // broadcast header and data to P2P network g, ctx := errgroup.WithContext(e.ctx) @@ -427,7 +426,7 @@ func (e *Executor) produceBlock() error { // don't fail block production on broadcast error } - e.recordBlockMetrics(data) + e.recordBlockMetrics(newState, data) e.logger.Info(). Uint64("height", newHeight). @@ -649,12 +648,16 @@ func (e *Executor) sendCriticalError(err error) { } // recordBlockMetrics records metrics for the produced block -func (e *Executor) recordBlockMetrics(data *types.Data) { +func (e *Executor) recordBlockMetrics(newState types.State, data *types.Data) { + e.metrics.Height.Set(float64(newState.LastBlockHeight)) + if data == nil || data.Metadata == nil { return } + e.metrics.NumTxs.Set(float64(len(data.Txs))) e.metrics.TotalTxs.Add(float64(len(data.Txs))) + e.metrics.TxsPerBlock.Observe(float64(len(data.Txs))) e.metrics.BlockSizeBytes.Set(float64(data.Size())) e.metrics.CommittedHeight.Set(float64(data.Metadata.Height)) } diff --git a/block/internal/submitting/submitter.go b/block/internal/submitting/submitter.go index 2a34b45084..8af2a76055 100644 --- a/block/internal/submitting/submitter.go +++ b/block/internal/submitting/submitter.go @@ -131,11 +131,6 @@ func (s *Submitter) Stop() error { return nil } -// updateMetrics updates sync-related metrics -func (s *Submitter) updateMetrics() { - s.metrics.DAInclusionHeight.Set(float64(s.GetDAIncludedHeight())) -} - // daSubmissionLoop handles submission of headers and data to DA layer (aggregator nodes only) func (s *Submitter) daSubmissionLoop() { s.logger.Info().Msg("starting DA submission loop") @@ -144,9 +139,6 @@ func (s *Submitter) daSubmissionLoop() { ticker := time.NewTicker(s.config.DA.BlockTime.Duration) defer ticker.Stop() - metricsTicker := time.NewTicker(30 * time.Second) - defer metricsTicker.Stop() - for { select { case <-s.ctx.Done(): @@ -199,8 +191,6 @@ func (s *Submitter) daSubmissionLoop() { }() } } - case <-metricsTicker.C: - s.updateMetrics() } } } @@ -219,6 +209,7 @@ func (s *Submitter) processDAInclusionLoop() { return case <-ticker.C: currentDAIncluded := s.GetDAIncludedHeight() + s.metrics.DAInclusionHeight.Set(float64(s.GetDAIncludedHeight())) for { nextHeight := currentDAIncluded + 1 diff --git a/sequencers/single/metrics.go b/sequencers/single/metrics.go deleted file mode 100644 index 546dce51c1..0000000000 --- a/sequencers/single/metrics.go +++ /dev/null @@ -1,99 +0,0 @@ -package single - -import ( - "errors" - - "github.com/go-kit/kit/metrics" - "github.com/go-kit/kit/metrics/discard" - "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" -) - -const ( - // MetricsSubsystem is a subsystem shared by all metrics exposed by this - // package. - MetricsSubsystem = "sequencer" -) - -// MetricsProvider returns sequencing Metrics. -type MetricsProvider func(chainID string) (*Metrics, error) - -// DefaultMetricsProvider returns Metrics build using Prometheus client library -// if Prometheus is enabled. Otherwise, it returns no-op Metrics. -func DefaultMetricsProvider(enabled bool) MetricsProvider { - return func(chainID string) (*Metrics, error) { - if enabled { - return PrometheusMetrics("chain_id", chainID) - } - return NopMetrics() - } -} - -// Metrics contains metrics exposed by this package. -type Metrics struct { - // GasPrice - GasPrice metrics.Gauge - // Last submitted blob size - LastBlobSize metrics.Gauge - // cost / byte - // CostPerByte metrics.Gauge - // Wallet Balance - // WalletBalance metrics.Gauge - // Transaction Status - TransactionStatus metrics.Counter - // Number of pending blocks. - NumPendingBlocks metrics.Gauge - // Last included block height - IncludedBlockHeight metrics.Gauge -} - -// PrometheusMetrics returns Metrics build using Prometheus client library. -// Optionally, labels can be provided along with their values ("foo", -// "fooValue"). -func PrometheusMetrics(labelsAndValues ...string) (*Metrics, error) { - if len(labelsAndValues)%2 != 0 { - return nil, errors.New("uneven number of labels and values; labels and values should be provided in pairs") - } - labels := []string{} - for i := 0; i < len(labelsAndValues); i += 2 { - labels = append(labels, labelsAndValues[i]) - } - return &Metrics{ - GasPrice: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: MetricsSubsystem, - Name: "gas_price", - Help: "The gas price of DA.", - }, labels).With(labelsAndValues...), - LastBlobSize: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: MetricsSubsystem, - Name: "last_blob_size", - Help: "The size in bytes of the last DA blob.", - }, labels).With(labelsAndValues...), - TransactionStatus: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Subsystem: MetricsSubsystem, - Name: "transaction_status", - Help: "Count of transaction statuses for DA submissions", - }, append(labels, "status")).With(labelsAndValues...), - NumPendingBlocks: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: MetricsSubsystem, - Name: "num_pending_blocks", - Help: "The number of pending blocks for DA submission.", - }, labels).With(labelsAndValues...), - IncludedBlockHeight: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: MetricsSubsystem, - Name: "included_block_height", - Help: "The last DA included block height.", - }, labels).With(labelsAndValues...), - }, nil -} - -// NopMetrics returns no-op Metrics. -func NopMetrics() (*Metrics, error) { - return &Metrics{ - GasPrice: discard.NewGauge(), - LastBlobSize: discard.NewGauge(), - TransactionStatus: discard.NewCounter(), - NumPendingBlocks: discard.NewGauge(), - IncludedBlockHeight: discard.NewGauge(), - }, nil -} diff --git a/sequencers/single/sequencer.go b/sequencers/single/sequencer.go index dbc5bc567c..89e5e4601f 100644 --- a/sequencers/single/sequencer.go +++ b/sequencers/single/sequencer.go @@ -31,10 +31,7 @@ type Sequencer struct { da coreda.DA batchTime time.Duration - - queue *BatchQueue // single queue for immediate availability - - metrics *Metrics + queue *BatchQueue // single queue for immediate availability } // NewSequencer creates a new Single Sequencer @@ -45,10 +42,9 @@ func NewSequencer( da coreda.DA, id []byte, batchTime time.Duration, - metrics *Metrics, proposer bool, ) (*Sequencer, error) { - return NewSequencerWithQueueSize(ctx, logger, db, da, id, batchTime, metrics, proposer, 1000) + return NewSequencerWithQueueSize(ctx, logger, db, da, id, batchTime, proposer, 1000) } // NewSequencerWithQueueSize creates a new Single Sequencer with configurable queue size @@ -59,7 +55,6 @@ func NewSequencerWithQueueSize( da coreda.DA, id []byte, batchTime time.Duration, - metrics *Metrics, proposer bool, maxQueueSize int, ) (*Sequencer, error) { @@ -69,7 +64,6 @@ func NewSequencerWithQueueSize( batchTime: batchTime, Id: id, queue: NewBatchQueue(db, "batches", maxQueueSize), - metrics: metrics, proposer: proposer, } @@ -128,18 +122,6 @@ func (c *Sequencer) GetNextBatch(ctx context.Context, req coresequencer.GetNextB }, nil } -// RecordMetrics updates the metrics with the given values. -// This method is intended to be called by the block manager after submitting data to the DA layer. -func (c *Sequencer) RecordMetrics(gasPrice float64, blobSize uint64, statusCode coreda.StatusCode, numPendingBlocks uint64, includedBlockHeight uint64) { - if c.metrics != nil { - c.metrics.GasPrice.Set(gasPrice) - c.metrics.LastBlobSize.Set(float64(blobSize)) - c.metrics.TransactionStatus.With("status", fmt.Sprintf("%d", statusCode)).Add(1) - c.metrics.NumPendingBlocks.Set(float64(numPendingBlocks)) - c.metrics.IncludedBlockHeight.Set(float64(includedBlockHeight)) - } -} - // VerifyBatch implements sequencing.Sequencer. func (c *Sequencer) VerifyBatch(ctx context.Context, req coresequencer.VerifyBatchRequest) (*coresequencer.VerifyBatchResponse, error) { if !c.isValid(req.Id) { diff --git a/sequencers/single/sequencer_test.go b/sequencers/single/sequencer_test.go index 5362b49040..87d6b47351 100644 --- a/sequencers/single/sequencer_test.go +++ b/sequencers/single/sequencer_test.go @@ -1,7 +1,6 @@ package single import ( - "bytes" "context" "errors" "testing" @@ -21,12 +20,11 @@ import ( func TestNewSequencer(t *testing.T) { // Create a new sequencer with mock DA client dummyDA := coreda.NewDummyDA(100_000_000, 10*time.Second) - metrics, _ := NopMetrics() db := ds.NewMapDatastore() ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() logger := zerolog.Nop() - seq, err := NewSequencer(ctx, logger, db, dummyDA, []byte("test1"), 10*time.Second, metrics, false) + seq, err := NewSequencer(ctx, logger, db, dummyDA, []byte("test1"), 10*time.Second, false) if err != nil { t.Fatalf("Failed to create sequencer: %v", err) } @@ -51,15 +49,13 @@ func TestNewSequencer(t *testing.T) { } func TestSequencer_SubmitBatchTxs(t *testing.T) { - // Initialize a new sequencer - metrics, _ := NopMetrics() dummyDA := coreda.NewDummyDA(100_000_000, 10*time.Second) db := ds.NewMapDatastore() ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() Id := []byte("test1") logger := zerolog.Nop() - seq, err := NewSequencer(ctx, logger, db, dummyDA, Id, 10*time.Second, metrics, false) + seq, err := NewSequencer(ctx, logger, db, dummyDA, Id, 10*time.Second, false) if err != nil { t.Fatalf("Failed to create sequencer: %v", err) } @@ -104,15 +100,13 @@ func TestSequencer_SubmitBatchTxs(t *testing.T) { } func TestSequencer_SubmitBatchTxs_EmptyBatch(t *testing.T) { - // Initialize a new sequencer - metrics, _ := NopMetrics() dummyDA := coreda.NewDummyDA(100_000_000, 10*time.Second) db := ds.NewMapDatastore() ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() Id := []byte("test1") logger := zerolog.Nop() - seq, err := NewSequencer(ctx, logger, db, dummyDA, Id, 10*time.Second, metrics, false) + seq, err := NewSequencer(ctx, logger, db, dummyDA, Id, 10*time.Second, false) require.NoError(t, err, "Failed to create sequencer") defer func() { err := db.Close() @@ -376,147 +370,6 @@ func TestSequencer_VerifyBatch(t *testing.T) { }) } -func TestSequencer_GetNextBatch_BeforeDASubmission(t *testing.T) { - t.Skip() - // Initialize a new sequencer with mock DA - metrics, _ := NopMetrics() - mockDA := &damocks.MockDA{} - db := ds.NewMapDatastore() - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - logger := zerolog.Nop() - seq, err := NewSequencer(ctx, logger, db, mockDA, []byte("test1"), 1*time.Second, metrics, false) - if err != nil { - t.Fatalf("Failed to create sequencer: %v", err) - } - defer func() { - err := db.Close() - if err != nil { - t.Fatalf("Failed to close sequencer: %v", err) - } - }() - - // Set up mock expectations - mockDA.On("Submit", mock.Anything, mock.Anything, mock.Anything, mock.Anything). - Return(nil, errors.New("mock DA always rejects submissions")) - - // Submit a batch - Id := []byte("test1") - tx := []byte("transaction1") - res, err := seq.SubmitBatchTxs(context.Background(), coresequencer.SubmitBatchTxsRequest{ - Id: Id, - Batch: &coresequencer.Batch{Transactions: [][]byte{tx}}, - }) - if err != nil { - t.Fatalf("Failed to submit transaction: %v", err) - } - if res == nil { - t.Fatal("Expected response to not be nil") - } - time.Sleep(100 * time.Millisecond) - - // Try to get the batch before DA submission - nextBatchResp, err := seq.GetNextBatch(context.Background(), coresequencer.GetNextBatchRequest{Id: Id}) - if err != nil { - t.Fatalf("Failed to get next batch: %v", err) - } - if len(nextBatchResp.Batch.Transactions) != 1 { - t.Fatalf("Expected 1 transaction, got %d", len(nextBatchResp.Batch.Transactions)) - } - if !bytes.Equal(nextBatchResp.Batch.Transactions[0], tx) { - t.Fatal("Expected transaction to match submitted transaction") - } - - // Verify all mock expectations were met - mockDA.AssertExpectations(t) -} - -// TestSequencer_RecordMetrics tests the RecordMetrics method to ensure it properly updates metrics. -func TestSequencer_RecordMetrics(t *testing.T) { - t.Run("With Metrics", func(t *testing.T) { - // Create a sequencer with metrics enabled - metrics, err := NopMetrics() - require.NoError(t, err) - logger := zerolog.Nop() - - seq := &Sequencer{ - logger: logger, - metrics: metrics, - } - - // Test values - gasPrice := 1.5 - blobSize := uint64(1024) - statusCode := coreda.StatusSuccess - numPendingBlocks := uint64(5) - includedBlockHeight := uint64(100) - - // Call RecordMetrics - should not panic or error - seq.RecordMetrics(gasPrice, blobSize, statusCode, numPendingBlocks, includedBlockHeight) - - // Since we're using NopMetrics (discard metrics), we can't verify the actual values - // but we can verify the method doesn't panic and completes successfully - assert.NotNil(t, seq.metrics) - }) - - t.Run("Without Metrics", func(t *testing.T) { - // Create a sequencer without metrics - logger := zerolog.Nop() - seq := &Sequencer{ - logger: logger, - metrics: nil, // No metrics - } - - // Test values - gasPrice := 2.0 - blobSize := uint64(2048) - statusCode := coreda.StatusNotIncludedInBlock - numPendingBlocks := uint64(3) - includedBlockHeight := uint64(200) - - // Call RecordMetrics - should not panic even with nil metrics - seq.RecordMetrics(gasPrice, blobSize, statusCode, numPendingBlocks, includedBlockHeight) - - // Verify metrics is still nil - assert.Nil(t, seq.metrics) - }) - - t.Run("With Different Status Codes", func(t *testing.T) { - // Create a sequencer with metrics - metrics, err := NopMetrics() - require.NoError(t, err) - logger := zerolog.Nop() - - seq := &Sequencer{ - logger: logger, - metrics: metrics, - } - - // Test different status codes - testCases := []struct { - name string - statusCode coreda.StatusCode - }{ - {"Success", coreda.StatusSuccess}, - {"NotIncluded", coreda.StatusNotIncludedInBlock}, - {"AlreadyInMempool", coreda.StatusAlreadyInMempool}, - {"TooBig", coreda.StatusTooBig}, - {"ContextCanceled", coreda.StatusContextCanceled}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - // Call RecordMetrics with different status codes - seq.RecordMetrics(1.0, 512, tc.statusCode, 2, 50) - - // Verify no panic occurred - assert.NotNil(t, seq.metrics) - }) - } - }) - -} - func TestSequencer_QueueLimit_Integration(t *testing.T) { // Test integration between sequencer and queue limits to demonstrate backpressure db := ds.NewMapDatastore() @@ -648,7 +501,6 @@ func TestSequencer_DAFailureAndQueueThrottling_Integration(t *testing.T) { dummyDA, []byte("test-chain"), 100*time.Millisecond, - nil, // metrics true, // proposer queueSize, )