diff --git a/fastdeploy/model_executor/utils.py b/fastdeploy/model_executor/utils.py index 8e90fb80fd..daf0c10f72 100644 --- a/fastdeploy/model_executor/utils.py +++ b/fastdeploy/model_executor/utils.py @@ -199,3 +199,19 @@ def temporary_dtype(dtype: str): yield finally: paddle.set_default_dtype(orig_dtype) + + +def is_paddle_support_v1_loader(): + src_shape = [32, 32] + tgt_shape = [1, 32, 64] + src_tensor = paddle.ones(src_shape, dtype="float32") + tgt_tensor = paddle.zeros(tgt_shape, dtype="float32") + for exp_id in range(tgt_shape[0]): + # gate + gate_tgt = tgt_tensor[exp_id][..., : tgt_shape[2] // 2] + gate_tgt.copy_(src_tensor, False) + # up + up_tgt = tgt_tensor[exp_id][..., tgt_shape[2] // 2 :] + up_tgt.copy_(src_tensor, False) + is_same = bool(paddle.all(tgt_tensor == 1)) + return is_same diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index 2b2ed9a591..43384f8ba2 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -43,6 +43,7 @@ from fastdeploy.inter_communicator import EngineWorkerQueue as TaskQueue from fastdeploy.inter_communicator import ExistTaskStatus, IPCSignal, ModelWeightsStatus from fastdeploy.model_executor.layers.quantization import get_quantization_config +from fastdeploy.model_executor.utils import is_paddle_support_v1_loader from fastdeploy.platforms import current_platform from fastdeploy.utils import get_logger, parse_quantization from fastdeploy.worker.worker_base import WorkerBase @@ -809,6 +810,9 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig: update_fd_config_for_mm(fd_config) update_think_end_id_for_ernie(fd_config) + if load_config.load_choices == "default_v1" and not is_paddle_support_v1_loader(): + raise ValueError("The install Paddle don't support v1 loader.") + return fd_config