diff --git a/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json b/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json index 4463ac503b02..ebd663e41354 100644 --- a/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json +++ b/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json @@ -109,7 +109,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang_e2e_request_latency_seconds_bucket[$__rate_interval])))\r\n", + "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))\r\n", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -125,7 +125,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(sglang_e2e_request_latency_seconds_bucket[$__rate_interval])))\r\n", + "expr": "histogram_quantile(0.9, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))\r\n", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -142,7 +142,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(sglang_e2e_request_latency_seconds_bucket[$__rate_interval])))\r\n", + "expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))\r\n", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -159,7 +159,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "avg(rate(sglang_e2e_request_latency_seconds_sum[$__rate_interval]) / rate(sglang_e2e_request_latency_seconds_count[$__rate_interval]))\r\n", + "expr": "avg(rate(sglang:e2e_request_latency_seconds_sum[$__rate_interval]) / rate(sglang:e2e_request_latency_seconds_count[$__rate_interval]))\r\n", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -253,7 +253,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(increase(sglang_e2e_request_latency_seconds_bucket{model_name=~\"$model_name\"}[$__rate_interval])) by (le)\r\n", + "expr": "sum(increase(sglang:e2e_request_latency_seconds_bucket{model_name=~\"$model_name\"}[$__rate_interval])) by (le)\r\n", "format": "heatmap", "fullMetaSearch": false, "includeNullMetadata": true, @@ -355,7 +355,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang_time_to_first_token_seconds_bucket[$__rate_interval])))\r\n", + "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))\r\n", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -371,7 +371,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.9, sum by (le) (rate(sglang_time_to_first_token_seconds_bucket[$__rate_interval])))\r\n", + "expr": "histogram_quantile(0.9, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))\r\n", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -388,7 +388,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum by (le) (rate(sglang_time_to_first_token_seconds_bucket[$__rate_interval])))\r\n", + "expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))\r\n", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -405,7 +405,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "avg(rate(sglang_time_to_first_token_seconds_sum[$__rate_interval]) / rate(sglang_time_to_first_token_seconds_count[$__rate_interval]))\r\n", + "expr": "avg(rate(sglang:time_to_first_token_seconds_sum[$__rate_interval]) / rate(sglang:time_to_first_token_seconds_count[$__rate_interval]))\r\n", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -503,7 +503,7 @@ "disableTextWrap": false, "editorMode": "builder", "exemplar": false, - "expr": "sum by(le) (increase(sglang_time_to_first_token_seconds_bucket{model_name=~\"$model_name\"}[$__rate_interval]))", + "expr": "sum by(le) (increase(sglang:time_to_first_token_seconds_bucket{model_name=~\"$model_name\"}[$__rate_interval]))", "format": "heatmap", "fullMetaSearch": false, "includeNullMetadata": true, @@ -606,7 +606,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "sglang_num_running_reqs", + "expr": "sglang:num_running_reqs", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -707,7 +707,7 @@ "uid": "ddyfngn31dg5cf" }, "editorMode": "code", - "expr": "sglang_gen_throughput", + "expr": "sglang:gen_throughput", "instant": false, "legendFormat": "{{instance}}", "range": true, @@ -805,7 +805,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "sglang_cache_hit_rate", + "expr": "sglang:cache_hit_rate", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, @@ -906,7 +906,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "sglang_num_queue_reqs", + "expr": "sglang:num_queue_reqs", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false,