Skip to content

Commit f6b39fa

Browse files
committed
init
Signed-off-by: NickLucche <[email protected]>
1 parent 577d498 commit f6b39fa

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

vllm/v1/metrics/loggers.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,104 @@ def __init__(
804804
],
805805
)
806806

807+
#
808+
# KVConnector metrics
809+
#
810+
self._nixl_metrics_enabled = False
811+
if (
812+
kv_transfer_config := vllm_config.kv_transfer_config
813+
) and kv_transfer_config.kv_connector == "NixlConnector":
814+
self._nixl_metrics_enabled = True
815+
buckets = [
816+
0.001,
817+
0.005,
818+
0.01,
819+
0.025,
820+
0.05,
821+
0.075,
822+
0.1,
823+
0.2,
824+
0.3,
825+
0.5,
826+
0.75,
827+
1.0,
828+
5.0,
829+
]
830+
nixl_histogram_xfer_time = self._histogram_cls(
831+
name="vllm:nixl_xfer_time_seconds",
832+
documentation="Histogram of transfer duration for NIXL KV"
833+
" Cache transfers.",
834+
buckets=buckets,
835+
labelnames=labelnames,
836+
)
837+
self.nixl_histogram_xfer_time = make_per_engine(
838+
nixl_histogram_xfer_time, engine_indexes, model_name
839+
)
840+
nixl_histogram_post_time = self._histogram_cls(
841+
name="vllm:nixl_post_time_seconds",
842+
documentation="Histogram of transfer post time for NIXL KV"
843+
" Cache transfers.",
844+
buckets=buckets[1:],
845+
labelnames=labelnames,
846+
)
847+
self.nixl_histogram_post_time = make_per_engine(
848+
nixl_histogram_post_time, engine_indexes, model_name
849+
)
850+
# uniform 2kb to 16gb range
851+
buckets = [2**10 + i for i in range(1, 24, 2)]
852+
nixl_histogram_bytes_transferred = self._histogram_cls(
853+
name="vllm:nixl_bytes_transferred",
854+
documentation="Histogram of bytes transferred per NIXL KV"
855+
" Cache transfers.",
856+
buckets=buckets,
857+
labelnames=labelnames,
858+
)
859+
self.nixl_histogram_bytes_transferred = make_per_engine(
860+
nixl_histogram_bytes_transferred, engine_indexes, model_name
861+
)
862+
buckets = [
863+
10,
864+
20,
865+
30,
866+
50,
867+
75,
868+
100,
869+
200,
870+
400,
871+
1000,
872+
2000,
873+
4000,
874+
10000,
875+
20000,
876+
50000,
877+
]
878+
nixl_histogram_num_descriptors = self._histogram_cls(
879+
name="vllm:nixl_num_descriptors",
880+
documentation="Histogram of number of descriptors per NIXL"
881+
" KV Cache transfers.",
882+
buckets=buckets,
883+
labelnames=labelnames,
884+
)
885+
self.nixl_histogram_num_descriptors = make_per_engine(
886+
nixl_histogram_num_descriptors, engine_indexes, model_name
887+
)
888+
counter_nixl_num_failed_transfers = self._counter_cls(
889+
name="vllm:nixl_num_failed_transfers",
890+
documentation="Number of failed NIXL KV Cache transfers.",
891+
labelnames=labelnames,
892+
)
893+
self.counter_nixl_num_failed_transfers = make_per_engine(
894+
counter_nixl_num_failed_transfers, engine_indexes, model_name
895+
)
896+
counter_nixl_num_failed_notifications = self._counter_cls(
897+
name="vllm:nixl_num_failed_notifications",
898+
documentation="Number of failed NIXL KV Cache notifications.",
899+
labelnames=labelnames,
900+
)
901+
self.counter_nixl_num_failed_notifications = make_per_engine(
902+
counter_nixl_num_failed_notifications, engine_indexes, model_name
903+
)
904+
807905
def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo):
808906
metrics_info = config_obj.metrics_info()
809907
metrics_info["engine"] = ""
@@ -869,6 +967,35 @@ def record(
869967
self.spec_decoding_prom.observe(
870968
scheduler_stats.spec_decoding_stats, engine_idx
871969
)
970+
# TODO factor this out into OOT metrics class
971+
if self._nixl_metrics_enabled and (
972+
kv_stats := scheduler_stats.kv_connector_stats
973+
):
974+
for prom_obj, list_item_key in zip(
975+
[
976+
self.nixl_histogram_xfer_time,
977+
self.nixl_histogram_post_time,
978+
self.nixl_histogram_bytes_transferred,
979+
self.nixl_histogram_num_descriptors,
980+
],
981+
[
982+
"transfer_duration",
983+
"post_duration",
984+
"bytes_transferred",
985+
"num_descriptors",
986+
],
987+
):
988+
for list_item in kv_stats[list_item_key]:
989+
prom_obj[engine_idx].observe(list_item)
990+
for counter_obj, counter_item_key in zip(
991+
[
992+
self.counter_nixl_num_failed_transfers,
993+
self.counter_nixl_num_failed_notifications,
994+
],
995+
["num_failed_transfers", "num_failed_notifications"],
996+
):
997+
for list_item in kv_stats[counter_item_key]:
998+
counter_obj[engine_idx].inc(list_item)
872999

8731000
if mm_cache_stats is not None:
8741001
self.counter_mm_cache_queries[engine_idx].inc(mm_cache_stats.queries)

0 commit comments

Comments
 (0)