3636from vllm .v1 .engine .parallel_sampling import ParentRequest
3737from vllm .v1 .engine .processor import Processor
3838from vllm .v1 .executor .abstract import Executor
39- from vllm .v1 .metrics .loggers import (StatLoggerBase , StatLoggerFactory ,
40- setup_default_loggers )
39+ from vllm .v1 .metrics .loggers import StatLoggerFactory , StatLoggerManager
4140from vllm .v1 .metrics .prometheus import shutdown_prometheus
42- from vllm .v1 .metrics .stats import IterationStats , SchedulerStats
41+ from vllm .v1 .metrics .stats import IterationStats
4342
4443logger = init_logger (__name__ )
4544
@@ -95,14 +94,6 @@ def __init__(
9594 self .log_requests = log_requests
9695 self .log_stats = log_stats
9796
98- # Set up stat loggers; independent set for each DP rank.
99- self .stat_loggers : list [list [StatLoggerBase ]] = setup_default_loggers (
100- vllm_config = vllm_config ,
101- log_stats = self .log_stats ,
102- engine_num = vllm_config .parallel_config .data_parallel_size ,
103- custom_stat_loggers = stat_loggers ,
104- )
105-
10697 # Tokenizer (+ ensure liveness if running in another process).
10798 self .tokenizer = init_tokenizer_from_configs (
10899 model_config = vllm_config .model_config ,
@@ -121,17 +112,24 @@ def __init__(
121112 log_stats = self .log_stats )
122113
123114 # EngineCore (starts the engine in background process).
124-
125115 self .engine_core = EngineCoreClient .make_async_mp_client (
126116 vllm_config = vllm_config ,
127117 executor_class = executor_class ,
128118 log_stats = self .log_stats ,
129119 client_addresses = client_addresses ,
130120 client_index = client_index ,
131121 )
132- if self .stat_loggers :
133- for stat_logger in self .stat_loggers [0 ]:
134- stat_logger .log_engine_initialized ()
122+
123+ # Loggers.
124+ self .logger_manager : Optional [StatLoggerManager ] = None
125+ if self .log_stats :
126+ self .logger_manager = StatLoggerManager (
127+ vllm_config = vllm_config ,
128+ engine_idxs = self .engine_core .engine_ranks ,
129+ custom_stat_loggers = stat_loggers ,
130+ )
131+ self .logger_manager .log_engine_initialized ()
132+
135133 self .output_handler : Optional [asyncio .Task ] = None
136134 try :
137135 # Start output handler eagerly if we are in the asyncio eventloop.
@@ -370,7 +368,7 @@ def _run_output_handler(self):
370368 engine_core = self .engine_core
371369 output_processor = self .output_processor
372370 log_stats = self .log_stats
373- stat_loggers = self .stat_loggers if log_stats else None
371+ logger_manager = self .logger_manager
374372
375373 async def output_handler ():
376374 try :
@@ -410,9 +408,9 @@ async def output_handler():
410408 # 4) Logging.
411409 # TODO(rob): make into a coroutine and launch it in
412410 # background thread once Prometheus overhead is non-trivial.
413- if stat_loggers :
414- AsyncLLM . _record_stats (
415- stat_loggers [ outputs .engine_index ] ,
411+ if logger_manager :
412+ logger_manager . record (
413+ engine_idx = outputs .engine_index ,
416414 scheduler_stats = outputs .scheduler_stats ,
417415 iteration_stats = iteration_stats ,
418416 )
@@ -431,18 +429,6 @@ async def abort(self, request_id: str) -> None:
431429 if self .log_requests :
432430 logger .info ("Aborted request %s." , request_id )
433431
434- @staticmethod
435- def _record_stats (
436- stat_loggers : list [StatLoggerBase ],
437- scheduler_stats : Optional [SchedulerStats ],
438- iteration_stats : Optional [IterationStats ],
439- ):
440- """static so that it can be used from the output_handler task
441- without a circular ref to AsyncLLM."""
442- for stat_logger in stat_loggers :
443- stat_logger .record (scheduler_stats = scheduler_stats ,
444- iteration_stats = iteration_stats )
445-
446432 async def encode (
447433 self ,
448434 prompt : PromptType ,
@@ -547,9 +533,8 @@ async def do_log_stats(
547533 scheduler_outputs = None ,
548534 model_output = None ,
549535 ) -> None :
550- for loggers in self .stat_loggers :
551- for stat_logger in loggers :
552- stat_logger .log ()
536+ if self .logger_manager :
537+ self .logger_manager .log ()
553538
554539 async def check_health (self ) -> None :
555540 logger .debug ("Called check_health." )
@@ -653,18 +638,16 @@ async def scale_elastic_ep(self,
653638 new_data_parallel_size
654639
655640 # recreate stat loggers
656- if new_data_parallel_size > old_data_parallel_size :
657- stat_loggers : list [list [StatLoggerBase ]] = setup_default_loggers (
641+ if new_data_parallel_size > old_data_parallel_size and self .log_stats :
642+ # TODO(rob): fix this after talking with Ray team.
643+ # This resets all the prometheus metrics since we
644+ # unregister during initialization. Need to understand
645+ # the intended behavior here better.
646+ self .logger_manager = StatLoggerManager (
658647 vllm_config = self .vllm_config ,
659- log_stats = self .log_stats ,
660- engine_num = new_data_parallel_size ,
648+ engine_idxs = list (range (new_data_parallel_size )),
661649 custom_stat_loggers = None ,
662650 )
663- num_new_engines = len (stat_loggers ) - len (self .stat_loggers )
664- self .stat_loggers .extend (stat_loggers [- num_new_engines :])
665- else :
666- for _ in range (old_data_parallel_size - new_data_parallel_size ):
667- self .stat_loggers .pop ()
668651
669652 @property
670653 def is_running (self ) -> bool :
0 commit comments