diff --git a/crates/node/engine/src/metrics/mod.rs b/crates/node/engine/src/metrics/mod.rs index 4ede3e14eb..406c42e33b 100644 --- a/crates/node/engine/src/metrics/mod.rs +++ b/crates/node/engine/src/metrics/mod.rs @@ -72,6 +72,10 @@ impl Metrics { /// Identifier for the counter that tracks the number of times the engine has been reset. pub const ENGINE_RESET_COUNT: &str = "kona_node_engine_reset_count"; + /// Identifier for the gauge that tracks the number of inflight engine tasks by type. + /// This metric helps monitor task queue backlog and identify bottlenecks. + pub const ENGINE_INFLIGHT_TASKS: &str = "kona_node_engine_inflight_tasks"; + /// Initializes metrics for the engine. /// /// This does two things: @@ -106,6 +110,13 @@ impl Metrics { metrics::Unit::Count, "Engine reset count" ); + + // Engine inflight tasks gauge + metrics::describe_gauge!( + Self::ENGINE_INFLIGHT_TASKS, + metrics::Unit::Count, + "Number of inflight engine tasks by type" + ); } /// Initializes metrics to `0` so they can be queried immediately by consumers of prometheus @@ -125,5 +136,23 @@ impl Metrics { // Engine reset count kona_macros::set!(counter, Self::ENGINE_RESET_COUNT, 0); + + // Engine inflight tasks (initialize all task types to 0) + kona_macros::set!(gauge, Self::ENGINE_INFLIGHT_TASKS, "type", Self::INSERT_TASK_LABEL, 0.0); + kona_macros::set!( + gauge, + Self::ENGINE_INFLIGHT_TASKS, + "type", + Self::CONSOLIDATE_TASK_LABEL, + 0.0 + ); + kona_macros::set!(gauge, Self::ENGINE_INFLIGHT_TASKS, "type", Self::BUILD_TASK_LABEL, 0.0); + kona_macros::set!( + gauge, + Self::ENGINE_INFLIGHT_TASKS, + "type", + Self::FINALIZE_TASK_LABEL, + 0.0 + ); } } diff --git a/crates/node/engine/src/task_queue/core.rs b/crates/node/engine/src/task_queue/core.rs index ef6e15c5a4..d7c66b9adb 100644 --- a/crates/node/engine/src/task_queue/core.rs +++ b/crates/node/engine/src/task_queue/core.rs @@ -1,10 +1,11 @@ //! The [`Engine`] is a task queue that receives and executes [`EngineTask`]s. use super::EngineTaskExt; +#[cfg(feature = "metrics")] +use crate::Metrics; use crate::{ EngineClient, EngineState, EngineSyncStateUpdate, EngineTask, EngineTaskError, - EngineTaskErrorSeverity, Metrics, SynchronizeTask, SynchronizeTaskError, - task_queue::EngineTaskErrors, + EngineTaskErrorSeverity, SynchronizeTask, SynchronizeTaskError, task_queue::EngineTaskErrors, }; use alloy_provider::Provider; use alloy_rpc_types_eth::Transaction; @@ -69,6 +70,11 @@ impl Engine { /// Enqueues a new [`EngineTask`] for execution. /// Updates the queue length and notifies listeners of the change. pub fn enqueue(&mut self, task: EngineTask) { + // Increment the inflight task metric for this task type + #[cfg(feature = "metrics")] + metrics::gauge!(Metrics::ENGINE_INFLIGHT_TASKS, "type" => task.task_metrics_label()) + .increment(1.0); + self.tasks.push(task); self.task_queue_length.send_replace(self.tasks.len()); } @@ -146,6 +152,15 @@ impl Engine { /// Clears the task queue. pub fn clear(&mut self) { + // Decrement metrics for each task being cleared + #[cfg(feature = "metrics")] + { + while let Some(task) = self.tasks.pop() { + metrics::gauge!(Metrics::ENGINE_INFLIGHT_TASKS, "type" => task.task_metrics_label()).decrement(1.0); + } + } + + #[cfg(not(feature = "metrics"))] self.tasks.clear(); } @@ -162,7 +177,15 @@ impl Engine { self.state_sender.send_replace(self.state); // Pop the task from the queue now that it's been executed. - self.tasks.pop(); + let completed_task = self.tasks.pop().expect("Task should exist since we peeked it"); + + // Decrement the inflight task metric for this task type + #[cfg(feature = "metrics")] + metrics::gauge!(Metrics::ENGINE_INFLIGHT_TASKS, "type" => completed_task.task_metrics_label()).decrement(1.0); + + // Avoid unused variable warning when metrics feature is disabled + #[cfg(not(feature = "metrics"))] + let _ = completed_task; self.task_queue_length.send_replace(self.tasks.len()); } diff --git a/crates/node/engine/src/task_queue/tasks/task.rs b/crates/node/engine/src/task_queue/tasks/task.rs index a9120a5afb..b5c6f52f06 100644 --- a/crates/node/engine/src/task_queue/tasks/task.rs +++ b/crates/node/engine/src/task_queue/tasks/task.rs @@ -110,7 +110,9 @@ impl EngineTask { Ok(()) } - const fn task_metrics_label(&self) -> &'static str { + /// Returns the metrics label for this task type. + /// Used for categorizing inflight task metrics by task type. + pub const fn task_metrics_label(&self) -> &'static str { match self { Self::Insert(_) => crate::Metrics::INSERT_TASK_LABEL, Self::Consolidate(_) => crate::Metrics::CONSOLIDATE_TASK_LABEL, diff --git a/docker/recipes/kona-node/grafana/dashboards/overview.json b/docker/recipes/kona-node/grafana/dashboards/overview.json index a4eb0a89c9..7477542691 100644 --- a/docker/recipes/kona-node/grafana/dashboards/overview.json +++ b/docker/recipes/kona-node/grafana/dashboards/overview.json @@ -1676,6 +1676,103 @@ "title": "L1 Reorganization Count", "transparent": true, "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Inflight Tasks", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 85 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "kona_node_engine_inflight_tasks{instance=~\"$instance\"}", + "instant": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Engine Inflight Tasks", + "transparent": true, + "type": "timeseries" } ], "title": "Engine",