From 9fa784f642565c9be0dcf0174dac923022cae1bc Mon Sep 17 00:00:00 2001 From: spypsy <6403450+spypsy@users.noreply.github.com> Date: Fri, 20 Feb 2026 14:18:33 +0000 Subject: [PATCH] chore(metrics): add L1 inclusion timing metrics to archiver Fixes [A-567](https://linear.app/aztec-labs/issue/A-567/metrics-on-l1-tx-timings-within-l2-slots) --- .../grafana/dashboards/aztec_network.json | 498 ++++++++++++++++++ .../archiver/src/modules/instrumentation.ts | 20 + .../archiver/src/modules/l1_synchronizer.ts | 8 + yarn-project/telemetry-client/src/metrics.ts | 7 + 4 files changed, 533 insertions(+) diff --git a/spartan/metrics/grafana/dashboards/aztec_network.json b/spartan/metrics/grafana/dashboards/aztec_network.json index 7b037701c3a8..8f0399e66798 100644 --- a/spartan/metrics/grafana/dashboards/aztec_network.json +++ b/spartan/metrics/grafana/dashboards/aztec_network.json @@ -2676,6 +2676,504 @@ ], "title": "Archiver Sync Duration (P95)", "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "Distribution of checkpoint L1 inclusion delays by time bucket (seconds into L2 slot)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "percent" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*0-5s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*5-10s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*10-15s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*15-20s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*20-30s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*30\\+s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 64 + }, + "id": 40, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "(\n sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"5\"}[$__rate_interval])) by (le)\n - ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"0\"}[$__rate_interval]))\n)\n/\nsum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_count{k8s_namespace_name=~\"$namespace\"}[$__rate_interval]))", + "legendFormat": "0-5s", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "(\n sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"10\"}[$__rate_interval])) by (le)\n - ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"5\"}[$__rate_interval]))\n)\n/\nsum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_count{k8s_namespace_name=~\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "5-10s", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "(\n sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"15\"}[$__rate_interval])) by (le)\n - ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"10\"}[$__rate_interval]))\n)\n/\nsum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_count{k8s_namespace_name=~\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "10-15s", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "(\n sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"20\"}[$__rate_interval])) by (le)\n - ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"15\"}[$__rate_interval]))\n)\n/\nsum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_count{k8s_namespace_name=~\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "15-20s", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "(\n sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"30\"}[$__rate_interval])) by (le)\n - ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"20\"}[$__rate_interval]))\n)\n/\nsum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_count{k8s_namespace_name=~\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "20-30s", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "(\n sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_count{k8s_namespace_name=~\"$namespace\"}[$__rate_interval]))\n - sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"30\"}[$__rate_interval]))\n)\n/\nsum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_count{k8s_namespace_name=~\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "30+s", + "range": true, + "refId": "F" + } + ], + "title": "Checkpoint L1 Inclusion Delay Distribution", + "type": "timeseries" + }, + { + "datasource": { + "default": true, + "type": "prometheus", + "uid": "${data_source}" + }, + "description": "Absolute count of checkpoints by L1 inclusion delay bucket", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 60, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "cps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*0-5s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*5-10s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*10-15s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*15-20s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*20-30s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*30\\+s.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 72 + }, + "id": 41, + "interval": "1m", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.3.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"5\"}[$__rate_interval])) by (le)\n- ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"0\"}[$__rate_interval]))", + "legendFormat": "0-5s", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"10\"}[$__rate_interval])) by (le)\n- ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"5\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "5-10s", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"15\"}[$__rate_interval])) by (le)\n- ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"10\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "10-15s", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"20\"}[$__rate_interval])) by (le)\n- ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"15\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "15-20s", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"30\"}[$__rate_interval])) by (le)\n- ignoring(le) sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"20\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "20-30s", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${data_source}" + }, + "editorMode": "code", + "expr": "sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_count{k8s_namespace_name=~\"$namespace\"}[$__rate_interval]))\n- sum(rate(aztec_archiver_checkpoint_l1_inclusion_delay_bucket{k8s_namespace_name=~\"$namespace\", le=\"30\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "30+s", + "range": true, + "refId": "F" + } + ], + "title": "Checkpoint L1 Inclusion Delay Count", + "type": "timeseries" } ], "preload": false, diff --git a/yarn-project/archiver/src/modules/instrumentation.ts b/yarn-project/archiver/src/modules/instrumentation.ts index fbf91cf16a1a..8f08ddeb3541 100644 --- a/yarn-project/archiver/src/modules/instrumentation.ts +++ b/yarn-project/archiver/src/modules/instrumentation.ts @@ -1,6 +1,9 @@ +import type { SlotNumber } from '@aztec/foundation/branded-types'; import { createLogger } from '@aztec/foundation/log'; import type { L2Block } from '@aztec/stdlib/block'; import type { CheckpointData } from '@aztec/stdlib/checkpoint'; +import type { L1RollupConstants } from '@aztec/stdlib/epoch-helpers'; +import { getTimestampForSlot } from '@aztec/stdlib/epoch-helpers'; import { Attributes, type Gauge, @@ -38,6 +41,8 @@ export class ArchiverInstrumentation { private blockProposalTxTargetCount: UpDownCounter; + private checkpointL1InclusionDelay: Histogram; + private log = createLogger('archiver:instrumentation'); private constructor( @@ -85,6 +90,8 @@ export class ArchiverInstrumentation { }, ); + this.checkpointL1InclusionDelay = meter.createHistogram(Metrics.ARCHIVER_CHECKPOINT_L1_INCLUSION_DELAY); + this.dbMetrics = new LmdbMetrics( meter, { @@ -161,4 +168,17 @@ export class ArchiverInstrumentation { [Attributes.L1_BLOCK_PROPOSAL_USED_TRACE]: usedTrace, }); } + + /** + * Records L1 inclusion timing for a checkpoint observed on L1 (seconds into the L2 slot). + */ + public processCheckpointL1Timing(data: { + slotNumber: SlotNumber; + l1Timestamp: bigint; + l1Constants: Pick; + }): void { + const slotStartTs = getTimestampForSlot(data.slotNumber, data.l1Constants); + const inclusionDelaySeconds = Number(data.l1Timestamp - slotStartTs); + this.checkpointL1InclusionDelay.record(inclusionDelaySeconds); + } } diff --git a/yarn-project/archiver/src/modules/l1_synchronizer.ts b/yarn-project/archiver/src/modules/l1_synchronizer.ts index 22b1ed5aba29..c61e378c8eaa 100644 --- a/yarn-project/archiver/src/modules/l1_synchronizer.ts +++ b/yarn-project/archiver/src/modules/l1_synchronizer.ts @@ -803,6 +803,14 @@ export class ArchiverL1Synchronizer implements Traceable { ); } + for (const published of validCheckpoints) { + this.instrumentation.processCheckpointL1Timing({ + slotNumber: published.checkpoint.header.slotNumber, + l1Timestamp: published.l1.timestamp, + l1Constants: this.l1Constants, + }); + } + try { const updatedValidationResult = rollupStatus.validationResult === initialValidationResult ? undefined : rollupStatus.validationResult; diff --git a/yarn-project/telemetry-client/src/metrics.ts b/yarn-project/telemetry-client/src/metrics.ts index 0187114e0d5d..f3152c3653b5 100644 --- a/yarn-project/telemetry-client/src/metrics.ts +++ b/yarn-project/telemetry-client/src/metrics.ts @@ -350,6 +350,13 @@ export const ARCHIVER_BLOCK_PROPOSAL_TX_TARGET_COUNT: MetricDefinition = { valueType: ValueType.INT, }; +export const ARCHIVER_CHECKPOINT_L1_INCLUSION_DELAY: MetricDefinition = { + name: 'aztec.archiver.checkpoint_l1_inclusion_delay', + description: 'Seconds into the L2 slot when the checkpoint L1 tx was included', + unit: 's', + valueType: ValueType.INT, +}; + export const NODE_RECEIVE_TX_DURATION: MetricDefinition = { name: 'aztec.node.receive_tx.duration', description: 'The duration of the receiveTx method',