From 6c867b8ea4de92acf2931d7ff98403bbfc10c681 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 21:21:58 +0100 Subject: [PATCH 01/11] Update weight_utils.py Signed-off-by: bbartels --- .../model_loader/weight_utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 64a2089921ee..950654dbdf8b 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -491,6 +491,24 @@ def runai_safetensors_weights_iterator( streamer.stream_file(st_file) yield from streamer.get_tensors() +def runai_safetensors_weights_iterator( + hf_weights_files: List[str], + use_tqdm_on_load: bool, +) -> Generator[tuple[str, torch.Tensor], None, None]: + """Iterate over the weights in the model safetensor files.""" + with SafetensorsStreamer() as streamer: + streamer.stream_files(hf_weights_files) + + tensor_iter = tqdm( + streamer.get_tensors(), + total=len(hf_weights_files), + desc="Loading safetensors using Runai Model Streamer", + bar_format=_BAR_FORMAT, + disable=not use_tqdm_on_load, + ) + + for name, tensor in tensor_iter: + yield name, tensor def fastsafetensors_weights_iterator( hf_weights_files: list[str], From 9bd544b535eef8f6cde86aa54e6e726e044ec395 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 21:30:39 +0100 Subject: [PATCH 02/11] Update weight_utils.py Signed-off-by: bbartels --- vllm/model_executor/model_loader/weight_utils.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 950654dbdf8b..5c6145e675f4 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -475,22 +475,6 @@ def safetensors_weights_iterator( param = f.get_tensor(name) yield name, param - -def runai_safetensors_weights_iterator( - hf_weights_files: list[str], - use_tqdm_on_load: bool, -) -> Generator[tuple[str, torch.Tensor], None, None]: - """Iterate over the weights in the model safetensor files.""" - with SafetensorsStreamer() as streamer: - for st_file in tqdm( - hf_weights_files, - desc="Loading safetensors using Runai Model Streamer", - disable=not enable_tqdm(use_tqdm_on_load), - bar_format=_BAR_FORMAT, - ): - streamer.stream_file(st_file) - yield from streamer.get_tensors() - def runai_safetensors_weights_iterator( hf_weights_files: List[str], use_tqdm_on_load: bool, From 33f4124d3264e138eff8637dab8045d40cf1af75 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 21:31:54 +0100 Subject: [PATCH 03/11] Update weight_utils.py Signed-off-by: bbartels --- vllm/model_executor/model_loader/weight_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 5c6145e675f4..c65220ab8768 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -485,7 +485,6 @@ def runai_safetensors_weights_iterator( tensor_iter = tqdm( streamer.get_tensors(), - total=len(hf_weights_files), desc="Loading safetensors using Runai Model Streamer", bar_format=_BAR_FORMAT, disable=not use_tqdm_on_load, From f60db2ff3226d28f21357d46316ab33cac017ad6 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 21:55:03 +0100 Subject: [PATCH 04/11] Update weight_utils.py Signed-off-by: bbartels --- vllm/model_executor/model_loader/weight_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index c65220ab8768..66d21b00bd7e 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -482,9 +482,13 @@ def runai_safetensors_weights_iterator( """Iterate over the weights in the model safetensor files.""" with SafetensorsStreamer() as streamer: streamer.stream_files(hf_weights_files) - + total_tensors = sum( + len(tensors_meta) + for tensors_meta in streamer.files_to_tensors_metadata.values() + ) tensor_iter = tqdm( streamer.get_tensors(), + total=total_tensors, desc="Loading safetensors using Runai Model Streamer", bar_format=_BAR_FORMAT, disable=not use_tqdm_on_load, From 176acb67427151916252ee14aa767cc22a1e5e2b Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 21:55:38 +0100 Subject: [PATCH 05/11] Update weight_utils.py Signed-off-by: bbartels --- vllm/model_executor/model_loader/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 66d21b00bd7e..a31a1696379e 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -476,7 +476,7 @@ def safetensors_weights_iterator( yield name, param def runai_safetensors_weights_iterator( - hf_weights_files: List[str], + hf_weights_files: list[str], use_tqdm_on_load: bool, ) -> Generator[tuple[str, torch.Tensor], None, None]: """Iterate over the weights in the model safetensor files.""" From 87654b4264160cfba5e827be40f9b09e91e36e39 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 22:03:46 +0100 Subject: [PATCH 06/11] Update weight_utils.py Signed-off-by: bbartels --- vllm/model_executor/model_loader/weight_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index a31a1696379e..88680c4f4f1b 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -494,8 +494,7 @@ def runai_safetensors_weights_iterator( disable=not use_tqdm_on_load, ) - for name, tensor in tensor_iter: - yield name, tensor + yield from tensor_iter def fastsafetensors_weights_iterator( hf_weights_files: list[str], From 07d5348c764bdf6c181826bf2537dcbebaa1f439 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 22:05:14 +0100 Subject: [PATCH 07/11] Update setup.py Signed-off-by: bbartels --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9a5ca3456a0f..b8e625f8c92d 100644 --- a/setup.py +++ b/setup.py @@ -659,7 +659,7 @@ def _read_requirements(filename: str) -> list[str]: "bench": ["pandas", "datasets"], "tensorizer": ["tensorizer==2.10.1"], "fastsafetensors": ["fastsafetensors >= 0.1.10"], - "runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"], + "runai": ["runai-model-streamer >= 0.13.3", "runai-model-streamer-s3", "boto3"], "audio": ["librosa", "soundfile", "mistral_common[audio]"], # Required for audio processing "video": [] # Kept for backwards compatibility From cd92c115f5c1824bd7ca924c7c7efbdf0989dd44 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 22:38:26 +0100 Subject: [PATCH 08/11] Update weight_utils.py Signed-off-by: bbartels --- vllm/model_executor/model_loader/weight_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 88680c4f4f1b..b385a90160cf 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -484,8 +484,8 @@ def runai_safetensors_weights_iterator( streamer.stream_files(hf_weights_files) total_tensors = sum( len(tensors_meta) - for tensors_meta in streamer.files_to_tensors_metadata.values() - ) + for tensors_meta in streamer.files_to_tensors_metadata.values()) + tensor_iter = tqdm( streamer.get_tensors(), total=total_tensors, From ad67e1046753067413bea82231a5e234c944f32a Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 23:08:27 +0100 Subject: [PATCH 09/11] Update setup.py Signed-off-by: bbartels --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b8e625f8c92d..d46e678e7aa4 100644 --- a/setup.py +++ b/setup.py @@ -659,7 +659,8 @@ def _read_requirements(filename: str) -> list[str]: "bench": ["pandas", "datasets"], "tensorizer": ["tensorizer==2.10.1"], "fastsafetensors": ["fastsafetensors >= 0.1.10"], - "runai": ["runai-model-streamer >= 0.13.3", "runai-model-streamer-s3", "boto3"], + "runai": + ["runai-model-streamer >= 0.13.3", "runai-model-streamer-s3", "boto3"], "audio": ["librosa", "soundfile", "mistral_common[audio]"], # Required for audio processing "video": [] # Kept for backwards compatibility From 4b183c4e164e9ee4ef8afaa47287f4f35185e5de Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 21 Jul 2025 23:23:11 +0100 Subject: [PATCH 10/11] Update weight_utils.py Signed-off-by: bbartels --- vllm/model_executor/model_loader/weight_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index b385a90160cf..958efd770901 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -475,6 +475,7 @@ def safetensors_weights_iterator( param = f.get_tensor(name) yield name, param + def runai_safetensors_weights_iterator( hf_weights_files: list[str], use_tqdm_on_load: bool, @@ -496,6 +497,7 @@ def runai_safetensors_weights_iterator( yield from tensor_iter + def fastsafetensors_weights_iterator( hf_weights_files: list[str], use_tqdm_on_load: bool, From 7ca0a8a942323fa1dd048bbc76516e1c35bd4a4d Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Tue, 22 Jul 2025 15:59:34 +0100 Subject: [PATCH 11/11] Update vllm/model_executor/model_loader/weight_utils.py Co-authored-by: Cyrus Leung Signed-off-by: bbartels --- vllm/model_executor/model_loader/weight_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 958efd770901..074126fa669e 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -492,7 +492,7 @@ def runai_safetensors_weights_iterator( total=total_tensors, desc="Loading safetensors using Runai Model Streamer", bar_format=_BAR_FORMAT, - disable=not use_tqdm_on_load, + disable=not enable_tqdm(use_tqdm_on_load), ) yield from tensor_iter