@@ -495,27 +495,20 @@ jobs:
495
495
# - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
496
496
# if: "failure()"
497
497
498
- L2_PTQ_Llama2_Export_Only :
499
- needs : [cicd-test-container-setup]
500
- uses : ./.github/workflows/_test_template.yml
501
- if : contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_Export_Only') || needs.cicd-test-container-setup.outputs.all == 'true'
502
- with :
503
- RUNNER : self-hosted-azure
504
- SCRIPT : |
505
- python examples/nlp/language_modeling/megatron_gpt_ptq.py \
506
- model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
507
- quantization.algorithm=null \
508
- export.save_path=/tmp/nlp_megatron_llama_export_only/ci_baseline
509
-
510
498
L2_PTQ_Llama2_FP8 :
511
499
needs : [cicd-test-container-setup]
512
500
uses : ./.github/workflows/_test_template.yml
513
501
if : contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_FP8') || needs.cicd-test-container-setup.outputs.all == 'true'
514
502
with :
515
503
RUNNER : self-hosted-azure
516
504
SCRIPT : |
505
+ CUDA_VISIBLE_DEVICES=0 python scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \
506
+ --input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf-tiny \
507
+ --output_path=/tmp/nlp_megatron_llama/llama_ci.nemo \
508
+ --precision=16
509
+
517
510
python examples/nlp/language_modeling/megatron_gpt_ptq.py \
518
- model.restore_from_path=/home/TestData/nlp/megatron_llama /llama_ci.nemo \
511
+ model.restore_from_path=/tmp/nlp_megatron_llama /llama_ci.nemo \
519
512
model.tensor_model_parallel_size=2 \
520
513
trainer.devices=2 \
521
514
quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
@@ -526,55 +519,8 @@ jobs:
526
519
export.sample_output=False \
527
520
export.save_path=/tmp/nlp_megatron_llama_eo/ci_fp8.qnemo
528
521
529
- L2_PTQ_Llama2_INT8_SQ :
530
- needs : [cicd-test-container-setup]
531
- uses : ./.github/workflows/_test_template.yml
532
- if : contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_INT8_SQ') || needs.cicd-test-container-setup.outputs.all == 'true'
533
- with :
534
- RUNNER : self-hosted-azure
535
- TIMEOUT : 15
536
- SCRIPT : |
537
- python examples/nlp/language_modeling/megatron_gpt_ptq.py \
538
- model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
539
- quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
540
- quantization.algorithm=int8_sq \
541
- quantization.num_calib_size=8 \
542
- inference.batch_size=2 \
543
- export.sample_output=False \
544
- export.save_path=/tmp/nlp_megatron_llama_eo/ci_int8_sq.qnemo
545
-
546
- # TODO: investigate int4_awq stuck issues and restore the test
547
- # L2_PTQ_Llama2_INT4_AWQ:
548
- # needs: [cicd-test-container-setup]
549
- # runs-on: self-hosted-azure
550
- # timeout-minutes: 10
551
- # container:
552
- # image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
553
- # options:
554
- # # --user 0:128
555
- # --device=/dev/nvidia0
556
- # --gpus all
557
- # --shm-size=8g
558
- # --env TRANSFORMERS_OFFLINE=0
559
- # --env HYDRA_FULL_ERROR=1
560
- # --volume /mnt/datadrive/TestData:/home/TestData
561
- # steps:
562
- # - name: Checkout repository
563
- # uses: actions/checkout@v4
564
- # - run: |
565
- # python examples/nlp/language_modeling/megatron_gpt_ptq.py \
566
- # model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
567
- # model.tensor_model_parallel_size=1 \
568
- # trainer.devices=1 \
569
- # quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
570
- # quantization.algorithm=int4_awq \
571
- # quantization.num_calib_size=8 \
572
- # inference.batch_size=2 \
573
- # export.save_path=/home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo
574
- #
575
- # rm -rf /home/TestData/nlp/megatron_llama/ci_int4_awq.qnemo
576
- # - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
577
- # if: "failure()"
522
+ AFTER_SCRIPT : |
523
+ rm -rf /tmp/nlp_megatron_llama_eo/ci_fp8.qnemo
578
524
579
525
# OPTIONAL_L2_QAT_Llama2_INT4:
580
526
# needs: [cicd-test-container-setup]
@@ -4477,10 +4423,8 @@ jobs:
4477
4423
- L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2
4478
4424
- L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED
4479
4425
- L2_NeMo_2_Mixtral_Pretraining
4480
- - L2_PTQ_Llama2_INT8_SQ
4481
4426
- L2_PTQ_Llama2_FP8
4482
4427
- L2_Community_LLM_Checkpoints_tests_Llama3
4483
- - L2_PTQ_Llama2_Export_Only
4484
4428
- L2_Distill_Llama2
4485
4429
- L2_Prune_Width_Llama2
4486
4430
- L2_Speech_to_Text_AED
0 commit comments