Skip to content

Commit cb07f13

Browse files
janeklHainan Xu
authored and
Hainan Xu
committed
Update PTQ tests and ModelOpt version (NVIDIA#11095)
* Deprecate NeMo 1 PTQ tests except FP8 Signed-off-by: Jan Lasek <[email protected]> * Convert model right before testing it for FP8 PTQ Signed-off-by: Jan Lasek <[email protected]> * Bump modelopt version Signed-off-by: Jan Lasek <[email protected]> --------- Signed-off-by: Jan Lasek <[email protected]> Signed-off-by: Hainan Xu <[email protected]>
1 parent c082504 commit cb07f13

File tree

3 files changed

+10
-66
lines changed

3 files changed

+10
-66
lines changed

.github/workflows/cicd-main.yml

+8-64
Original file line numberDiff line numberDiff line change
@@ -495,27 +495,20 @@ jobs:
495495
# - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
496496
# if: "failure()"
497497

498-
L2_PTQ_Llama2_Export_Only:
499-
needs: [cicd-test-container-setup]
500-
uses: ./.github/workflows/_test_template.yml
501-
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_Export_Only') || needs.cicd-test-container-setup.outputs.all == 'true'
502-
with:
503-
RUNNER: self-hosted-azure
504-
SCRIPT: |
505-
python examples/nlp/language_modeling/megatron_gpt_ptq.py \
506-
model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
507-
quantization.algorithm=null \
508-
export.save_path=/tmp/nlp_megatron_llama_export_only/ci_baseline
509-
510498
L2_PTQ_Llama2_FP8:
511499
needs: [cicd-test-container-setup]
512500
uses: ./.github/workflows/_test_template.yml
513501
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_FP8') || needs.cicd-test-container-setup.outputs.all == 'true'
514502
with:
515503
RUNNER: self-hosted-azure
516504
SCRIPT: |
505+
CUDA_VISIBLE_DEVICES=0 python scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \
506+
--input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf-tiny \
507+
--output_path=/tmp/nlp_megatron_llama/llama_ci.nemo \
508+
--precision=16
509+
517510
python examples/nlp/language_modeling/megatron_gpt_ptq.py \
518-
model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
511+
model.restore_from_path=/tmp/nlp_megatron_llama/llama_ci.nemo \
519512
model.tensor_model_parallel_size=2 \
520513
trainer.devices=2 \
521514
quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
@@ -526,55 +519,8 @@ jobs:
526519
export.sample_output=False \
527520
export.save_path=/tmp/nlp_megatron_llama_eo/ci_fp8.qnemo
528521
529-
L2_PTQ_Llama2_INT8_SQ:
530-
needs: [cicd-test-container-setup]
531-
uses: ./.github/workflows/_test_template.yml
532-
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_INT8_SQ') || needs.cicd-test-container-setup.outputs.all == 'true'
533-
with:
534-
RUNNER: self-hosted-azure
535-
TIMEOUT: 15
536-
SCRIPT: |
537-
python examples/nlp/language_modeling/megatron_gpt_ptq.py \
538-
model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
539-
quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
540-
quantization.algorithm=int8_sq \
541-
quantization.num_calib_size=8 \
542-
inference.batch_size=2 \
543-
export.sample_output=False \
544-
export.save_path=/tmp/nlp_megatron_llama_eo/ci_int8_sq.qnemo
545-
546-
# TODO: investigate int4_awq stuck issues and restore the test
547-
#L2_PTQ_Llama2_INT4_AWQ:
548-
# needs: [cicd-test-container-setup]
549-
# runs-on: self-hosted-azure
550-
# timeout-minutes: 10
551-
# container:
552-
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
553-
# options:
554-
# # --user 0:128
555-
# --device=/dev/nvidia0
556-
# --gpus all
557-
# --shm-size=8g
558-
# --env TRANSFORMERS_OFFLINE=0
559-
# --env HYDRA_FULL_ERROR=1
560-
# --volume /mnt/datadrive/TestData:/home/TestData
561-
# steps:
562-
# - name: Checkout repository
563-
# uses: actions/checkout@v4
564-
# - run: |
565-
# python examples/nlp/language_modeling/megatron_gpt_ptq.py \
566-
# model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
567-
# model.tensor_model_parallel_size=1 \
568-
# trainer.devices=1 \
569-
# quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
570-
# quantization.algorithm=int4_awq \
571-
# quantization.num_calib_size=8 \
572-
# inference.batch_size=2 \
573-
# export.save_path=/home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo
574-
#
575-
# rm -rf /home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo
576-
#- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
577-
# if: "failure()"
522+
AFTER_SCRIPT: |
523+
rm -rf /tmp/nlp_megatron_llama_eo/ci_fp8.qnemo
578524
579525
# OPTIONAL_L2_QAT_Llama2_INT4:
580526
# needs: [cicd-test-container-setup]
@@ -4477,10 +4423,8 @@ jobs:
44774423
- L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2
44784424
- L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED
44794425
- L2_NeMo_2_Mixtral_Pretraining
4480-
- L2_PTQ_Llama2_INT8_SQ
44814426
- L2_PTQ_Llama2_FP8
44824427
- L2_Community_LLM_Checkpoints_tests_Llama3
4483-
- L2_PTQ_Llama2_Export_Only
44844428
- L2_Distill_Llama2
44854429
- L2_Prune_Width_Llama2
44864430
- L2_Speech_to_Text_AED

Dockerfile.ci

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T
5252

5353
# Install NeMo requirements
5454
ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
55-
ARG MODELOPT_VERSION=0.17.0
55+
ARG MODELOPT_VERSION=0.19.0
5656
ARG MCORE_TAG=213c8a23fa9fe95d19eff0932a1e6e71767f0962
5757

5858
ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c

docs/source/starthere/intro.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ This final step involves installing the TensorRT Model Optimizer package.
102102

103103
.. code-block:: bash
104104
105-
pip install nvidia-modelopt[torch]~=0.17.0 --extra-index-url https://pypi.nvidia.com
105+
pip install nvidia-modelopt[torch]~=0.19.0 --extra-index-url https://pypi.nvidia.com
106106
107107
108108
.. code-block:: bash

0 commit comments

Comments
 (0)