diff --git a/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json b/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json index d4a022d08bf..ebd663e4135 100644 --- a/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json +++ b/examples/monitoring/grafana/dashboards/json/sglang-dashboard.json @@ -147,7 +147,7 @@ "hide": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "P95", + "legendFormat": "P50", "range": true, "refId": "C", "useBackend": false @@ -164,7 +164,7 @@ "hide": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "P50", + "legendFormat": "Avg", "range": true, "refId": "D", "useBackend": false @@ -393,7 +393,7 @@ "hide": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "P95", + "legendFormat": "P50", "range": true, "refId": "C", "useBackend": false @@ -405,12 +405,12 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "avg(rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval]) / rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval]))\r\n", + "expr": "avg(rate(sglang:time_to_first_token_seconds_sum[$__rate_interval]) / rate(sglang:time_to_first_token_seconds_count[$__rate_interval]))\r\n", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "P50", + "legendFormat": "Avg", "range": true, "refId": "D", "useBackend": false diff --git a/python/sglang/srt/mem_cache/hiradix_cache.py b/python/sglang/srt/mem_cache/hiradix_cache.py index f939fff4b2c..d21077eca55 100644 --- a/python/sglang/srt/mem_cache/hiradix_cache.py +++ b/python/sglang/srt/mem_cache/hiradix_cache.py @@ -113,6 +113,7 @@ def write_backup(self, node: TreeNode, write_back=False): ) if host_indices is not None: node.host_value = host_indices + assert len(node.host_value) > 0 self.ongoing_write_through[node.id] = node if not write_back: # no need to lock nodes if write back