Skip to content

Commit 8e5d33a

Browse files
committed
minor fix
Signed-off-by: Tian Zheng <[email protected]>
1 parent 4b8eb90 commit 8e5d33a

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

tensorrt_llm/_torch/pyexecutor/resource_manager.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -508,8 +508,9 @@ def update_resources(self, scheduled_batch: ScheduledRequests):
508508
def free_resources(self, request: LlmRequest):
509509
self.impl.remove_sequence(request.py_request_id, request)
510510

511+
@staticmethod
511512
def calculate_scaling_factor_size_bytes(
512-
self, cache_size: int, quant_vector_size: int,
513+
cache_size: int, quant_vector_size: int,
513514
scaling_factor_dtype: DataType) -> int:
514515
assert cache_size % quant_vector_size == 0, "NVFP4 cache size must be divisible by quant vector size"
515516
return get_size_in_bytes(cache_size // quant_vector_size,
@@ -733,7 +734,7 @@ def calculate_cache_size_per_token(layers: Set[int]) -> int:
733734
cache_size_bytes_per_token = get_size_in_bytes(
734735
cache_size_per_token, dtype)
735736
if dtype == DataType.NVFP4:
736-
cache_size_bytes_per_token += self.calculate_scaling_factor_size_bytes(
737+
cache_size_bytes_per_token += KVCacheManager.calculate_scaling_factor_size_bytes(
737738
cache_size_per_token,
738739
quant_vector_size=16,
739740
scaling_factor_dtype=DataType.FP8)
@@ -766,7 +767,7 @@ def calculate_cache_size_per_token(layers: Set[int]) -> int:
766767
cache_size_bytes_per_token = get_size_in_bytes(
767768
cache_size_per_token, dtype)
768769
if dtype == DataType.NVFP4:
769-
cache_size_bytes_per_token += self.calculate_scaling_factor_size_bytes(
770+
cache_size_bytes_per_token += KVCacheManager.calculate_scaling_factor_size_bytes(
770771
cache_size_per_token,
771772
quant_vector_size=16,
772773
scaling_factor_dtype=DataType.FP8)

0 commit comments

Comments
 (0)