diff --git a/python/sglang/multimodal_gen/runtime/loader/fsdp_load.py b/python/sglang/multimodal_gen/runtime/loader/fsdp_load.py index ae40df3f15f6..3ca0592be320 100644 --- a/python/sglang/multimodal_gen/runtime/loader/fsdp_load.py +++ b/python/sglang/multimodal_gen/runtime/loader/fsdp_load.py @@ -269,7 +269,7 @@ def load_model_from_full_model_state_dict( meta_sharded_param.placements, ) if cpu_offload: - sharded_tensor = sharded_tensor.to("cpu", pin_memory=True) + sharded_tensor = sharded_tensor.to("cpu") sharded_sd[target_param_name] = nn.Parameter(sharded_tensor) model.reverse_param_names_mapping = reverse_param_names_mapping