diff --git a/vllm_ascend/patch/platform/patch_minimax_usage_accounting.py b/vllm_ascend/patch/platform/patch_minimax_usage_accounting.py index 6b7ad2ed741..902b90a6b69 100644 --- a/vllm_ascend/patch/platform/patch_minimax_usage_accounting.py +++ b/vllm_ascend/patch/platform/patch_minimax_usage_accounting.py @@ -300,6 +300,7 @@ async def _wrapped_chat_completion_stream_generator( tokenizer, request_metadata: engine_protocol.RequestResponseMetadata, reasoning_parser=None, + **extra_kwargs: Any, ): num_choices = 1 if request.n is None else request.n state = _create_usage_tracking_state(num_choices, reasoning_parser) @@ -314,6 +315,7 @@ async def _wrapped_chat_completion_stream_generator( tokenizer, request_metadata, reasoning_parser, + **extra_kwargs, ): yield _inject_stream_usage_details(data, state)