@@ -150,8 +150,14 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
150150 # (temp reason): we don't support re-routing prefill requests
151151 # (long-term reason): prefill engine should pull from a global queue so there is
152152 # only a few in-flight requests that can be quickly finished
153- generate_endpoint .serve_endpoint (handler .generate , graceful_shutdown = True ),
154- clear_endpoint .serve_endpoint (handler .clear_kv_blocks ),
153+ generate_endpoint .serve_endpoint (
154+ handler .generate ,
155+ graceful_shutdown = True ,
156+ metrics_labels = [("model" , config .model )],
157+ ),
158+ clear_endpoint .serve_endpoint (
159+ handler .clear_kv_blocks , metrics_labels = [("model" , config .model )]
160+ ),
155161 )
156162 except Exception as e :
157163 logger .error (f"Failed to serve endpoints: { e } " )
@@ -178,7 +184,11 @@ async def init(runtime: DistributedRuntime, config: Config):
178184 .client ()
179185 )
180186
181- factory = StatLoggerFactory (component , config .engine_args .data_parallel_rank or 0 )
187+ factory = StatLoggerFactory (
188+ component ,
189+ config .engine_args .data_parallel_rank or 0 ,
190+ metrics_labels = [("model" , config .model )],
191+ )
182192 engine_client , vllm_config , default_sampling_params = setup_vllm_engine (
183193 config , factory
184194 )
@@ -239,8 +249,14 @@ async def init(runtime: DistributedRuntime, config: Config):
239249 await asyncio .gather (
240250 # for decode, we want to transfer the in-flight requests to other decode engines,
241251 # because waiting them to finish can take a long time for long OSLs
242- generate_endpoint .serve_endpoint (handler .generate , graceful_shutdown = False ),
243- clear_endpoint .serve_endpoint (handler .clear_kv_blocks ),
252+ generate_endpoint .serve_endpoint (
253+ handler .generate ,
254+ graceful_shutdown = False ,
255+ metrics_labels = [("model" , config .model )],
256+ ),
257+ clear_endpoint .serve_endpoint (
258+ handler .clear_kv_blocks , metrics_labels = [("model" , config .model )]
259+ ),
244260 )
245261 except Exception as e :
246262 logger .error (f"Failed to serve endpoints: { e } " )
0 commit comments