@@ -804,6 +804,104 @@ def __init__(
804804 ],
805805 )
806806
807+ #
808+ # KVConnector metrics
809+ #
810+ self ._nixl_metrics_enabled = False
811+ if (
812+ kv_transfer_config := vllm_config .kv_transfer_config
813+ ) and kv_transfer_config .kv_connector == "NixlConnector" :
814+ self ._nixl_metrics_enabled = True
815+ buckets = [
816+ 0.001 ,
817+ 0.005 ,
818+ 0.01 ,
819+ 0.025 ,
820+ 0.05 ,
821+ 0.075 ,
822+ 0.1 ,
823+ 0.2 ,
824+ 0.3 ,
825+ 0.5 ,
826+ 0.75 ,
827+ 1.0 ,
828+ 5.0 ,
829+ ]
830+ nixl_histogram_xfer_time = self ._histogram_cls (
831+ name = "vllm:nixl_xfer_time_seconds" ,
832+ documentation = "Histogram of transfer duration for NIXL KV"
833+ " Cache transfers." ,
834+ buckets = buckets ,
835+ labelnames = labelnames ,
836+ )
837+ self .nixl_histogram_xfer_time = make_per_engine (
838+ nixl_histogram_xfer_time , engine_indexes , model_name
839+ )
840+ nixl_histogram_post_time = self ._histogram_cls (
841+ name = "vllm:nixl_post_time_seconds" ,
842+ documentation = "Histogram of transfer post time for NIXL KV"
843+ " Cache transfers." ,
844+ buckets = buckets [1 :],
845+ labelnames = labelnames ,
846+ )
847+ self .nixl_histogram_post_time = make_per_engine (
848+ nixl_histogram_post_time , engine_indexes , model_name
849+ )
850+ # uniform 2kb to 16gb range
851+ buckets = [2 ** 10 + i for i in range (1 , 24 , 2 )]
852+ nixl_histogram_bytes_transferred = self ._histogram_cls (
853+ name = "vllm:nixl_bytes_transferred" ,
854+ documentation = "Histogram of bytes transferred per NIXL KV"
855+ " Cache transfers." ,
856+ buckets = buckets ,
857+ labelnames = labelnames ,
858+ )
859+ self .nixl_histogram_bytes_transferred = make_per_engine (
860+ nixl_histogram_bytes_transferred , engine_indexes , model_name
861+ )
862+ buckets = [
863+ 10 ,
864+ 20 ,
865+ 30 ,
866+ 50 ,
867+ 75 ,
868+ 100 ,
869+ 200 ,
870+ 400 ,
871+ 1000 ,
872+ 2000 ,
873+ 4000 ,
874+ 10000 ,
875+ 20000 ,
876+ 50000 ,
877+ ]
878+ nixl_histogram_num_descriptors = self ._histogram_cls (
879+ name = "vllm:nixl_num_descriptors" ,
880+ documentation = "Histogram of number of descriptors per NIXL"
881+ " KV Cache transfers." ,
882+ buckets = buckets ,
883+ labelnames = labelnames ,
884+ )
885+ self .nixl_histogram_num_descriptors = make_per_engine (
886+ nixl_histogram_num_descriptors , engine_indexes , model_name
887+ )
888+ counter_nixl_num_failed_transfers = self ._counter_cls (
889+ name = "vllm:nixl_num_failed_transfers" ,
890+ documentation = "Number of failed NIXL KV Cache transfers." ,
891+ labelnames = labelnames ,
892+ )
893+ self .counter_nixl_num_failed_transfers = make_per_engine (
894+ counter_nixl_num_failed_transfers , engine_indexes , model_name
895+ )
896+ counter_nixl_num_failed_notifications = self ._counter_cls (
897+ name = "vllm:nixl_num_failed_notifications" ,
898+ documentation = "Number of failed NIXL KV Cache notifications." ,
899+ labelnames = labelnames ,
900+ )
901+ self .counter_nixl_num_failed_notifications = make_per_engine (
902+ counter_nixl_num_failed_notifications , engine_indexes , model_name
903+ )
904+
807905 def log_metrics_info (self , type : str , config_obj : SupportsMetricsInfo ):
808906 metrics_info = config_obj .metrics_info ()
809907 metrics_info ["engine" ] = ""
@@ -869,6 +967,35 @@ def record(
869967 self .spec_decoding_prom .observe (
870968 scheduler_stats .spec_decoding_stats , engine_idx
871969 )
970+ # TODO factor this out into OOT metrics class
971+ if self ._nixl_metrics_enabled and (
972+ kv_stats := scheduler_stats .kv_connector_stats
973+ ):
974+ for prom_obj , list_item_key in zip (
975+ [
976+ self .nixl_histogram_xfer_time ,
977+ self .nixl_histogram_post_time ,
978+ self .nixl_histogram_bytes_transferred ,
979+ self .nixl_histogram_num_descriptors ,
980+ ],
981+ [
982+ "transfer_duration" ,
983+ "post_duration" ,
984+ "bytes_transferred" ,
985+ "num_descriptors" ,
986+ ],
987+ ):
988+ for list_item in kv_stats [list_item_key ]:
989+ prom_obj [engine_idx ].observe (list_item )
990+ for counter_obj , counter_item_key in zip (
991+ [
992+ self .counter_nixl_num_failed_transfers ,
993+ self .counter_nixl_num_failed_notifications ,
994+ ],
995+ ["num_failed_transfers" , "num_failed_notifications" ],
996+ ):
997+ for list_item in kv_stats [counter_item_key ]:
998+ counter_obj [engine_idx ].inc (list_item )
872999
8731000 if mm_cache_stats is not None :
8741001 self .counter_mm_cache_queries [engine_idx ].inc (mm_cache_stats .queries )
0 commit comments