From e667bb6ba9afe0728dba5893f52b6ca0e9214ad2 Mon Sep 17 00:00:00 2001 From: jenchen13 Date: Fri, 31 Oct 2025 11:29:57 -0700 Subject: [PATCH] add ep in ptq Signed-off-by: jenchen13 --- nemo/collections/llm/api.py | 2 ++ scripts/llm/ptq.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/nemo/collections/llm/api.py b/nemo/collections/llm/api.py index 3cbf6583acf4..29ad1f6deabb 100644 --- a/nemo/collections/llm/api.py +++ b/nemo/collections/llm/api.py @@ -471,6 +471,7 @@ def ptq( export_config: ExportConfig, calibration_tp: int = 1, calibration_pp: int = 1, + calibration_ep: int = 1, num_layers_in_first_pipeline_stage: int | None = None, num_layers_in_last_pipeline_stage: int | None = None, devices: int | None = None, @@ -558,6 +559,7 @@ def ptq( pipeline_model_parallel_size=calibration_pp, num_layers_in_first_pipeline_stage=num_layers_in_first_pipeline_stage, num_layers_in_last_pipeline_stage=num_layers_in_last_pipeline_stage, + expert_model_parallel_size=calibration_ep, devices=devices, num_nodes=num_nodes, inference_only=True, diff --git a/scripts/llm/ptq.py b/scripts/llm/ptq.py index f7ee5f37d4a4..1b8bf84b16ab 100644 --- a/scripts/llm/ptq.py +++ b/scripts/llm/ptq.py @@ -34,6 +34,7 @@ def get_args(): ) parser.add_argument("--decoder_type", type=str, help="Decoder type for TensorRT-Model-Optimizer") parser.add_argument("-ctp", "--calibration_tp", "--calib_tp", type=int, default=1) + parser.add_argument("-cep", "--calibration_ep", "--calib_ep", type=int, default=1) parser.add_argument("-cpp", "--calibration_pp", "--calib_pp", type=int, default=1) parser.add_argument( "--num_layers_in_first_pipeline_stage", @@ -167,6 +168,7 @@ def main(): export_config=export_config, calibration_tp=args.calibration_tp, calibration_pp=args.calibration_pp, + calibration_ep=args.calibration_ep, num_layers_in_first_pipeline_stage=args.num_layers_in_first_pipeline_stage, num_layers_in_last_pipeline_stage=args.num_layers_in_last_pipeline_stage, devices=args.devices,