ci: Add coverage reports #11

Workflow file for this run

.github/workflows/cicd-main copy.yml at 72dde38

	# Copyright (c) 2020-2021, NVIDIA CORPORATION.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	name: "CICD NeMo"
	on:
	pull_request:
	branches:
	- 'main'
	- 'r**'
	types: [ labeled ]

	workflow_dispatch:
	inputs:
	test_to_run:
	required: false
	default: all
	type: string
	description: Comma-separated list of tests to run. Use "all" to run the full test suite.

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: true


	jobs:
	pre-flight:
	runs-on: ubuntu-latest
	outputs:
	test_to_run: ${{ steps.test_to_run.outputs.main }}
	all: ${{ steps.all.outputs.main }}
	event_name: ${{ steps.github-event.outputs.main }}
	steps:
	- name: Parse test_to_run
	id: test_to_run
	run: \|
	parsed_string=$(echo ${{ inputs.test_to_run \|\| 'all' }} \| jq -c --raw-input 'split(",")')
	echo "main=${parsed_string}" \| tee -a "$GITHUB_OUTPUT"
	- name: Parse all
	id: all
	run: \|
	echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" \| tee -a "$GITHUB_OUTPUT"
	- name: Infer github event
	id: github-event
	run: \|
	echo "main=${{ github.event_name }}" \| tee -a "$GITHUB_OUTPUT"

	cicd-test-container-build:
	if: ${{ github.event.label.name == 'Run CICD' \|\| needs.pre-flight.outputs.event_name == 'workflow_dispatch' }}
	uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
	needs: pre-flight
	with:
	image-name: nemo_container
	dockerfile: Dockerfile.ci
	image-label: nemo-core
	build-args: \|
	IMAGE_LABEL=nemo-core
	prune-filter-timerange: 24h

	cicd-test-container-setup:
	needs: [cicd-test-container-build, pre-flight]
	runs-on: self-hosted-azure-builder
	if: ${{ github.event.label.name == 'Run CICD' \|\| github.event_name == 'workflow_dispatch' }}
	outputs:
	test_to_run: ${{ needs.pre-flight.outputs.test_to_run }}
	all: ${{ needs.pre-flight.outputs.all }}
	steps:
	- name: Run some checks
	run: \|
	docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\
	# PyTorch Lightning version
	python -c "import lightning.pytorch; print(lightning.pytorch.__version__)"

	# PyTorch Lightning DDP Checks
	CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py"

	# Basic Import Checks
	python -c "import nemo.collections.asr as nemo_asr"
	python -c "import nemo.collections.nlp as nemo_nlp"
	python -c "import nemo.collections.nlp as nemo_nlp; nemo_nlp.modules.get_tokenizer_list()"
	python -c "import nemo.collections.tts as nemo_tts"

	python setup.py style
	python tests/check_copyright_header.py --dir .

	# These checks are not crucial
	exit 0
	'
	### \'\'

	# L0: GPU unit tests
	L0_Unit_Tests_GPU_ASR:
	needs: [cicd-test-container-setup]
	uses: ./.github/workflows/_test_template.yml
	if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_ASR') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	with:
	RUNNER: self-hosted-azure
	TIMEOUT: 20
	# TODO: remove this hack
	SCRIPT: \|
	python -c "from nemo.collections.asr.models import ASRModel" && NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --cov-report=term --cov=nemo/collections/asr --with_downloads

	# L0_Unit_Tests_GPU_Audio:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_Audio') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# TIMEOUT: 20
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/collections/audio -m "not pleasefixme" --with_downloads

	# L0_Unit_Tests_GPU_Common:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_Common') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/collections/common -m "not pleasefixme" --with_downloads

	# L0_Unit_Tests_GPU_LLM:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_LLM') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/collections/llm -m "not pleasefixme" --with_downloads

	# L0_Unit_Tests_GPU_Multimodal:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_Multimodal') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/collections/multimodal -m "not pleasefixme" --with_downloads

	# L0_Unit_Tests_GPU_NLP:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_NLP') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --with_downloads

	# L0_Unit_Tests_GPU_TTS:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_TTS') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/collections/tts -m "not pleasefixme" --with_downloads

	# OPTIONAL_L0_Unit_Tests_GPU_Core:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L0_Unit_Tests_GPU_Core') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# TIMEOUT: 20
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/core -m "not pleasefixme" --with_downloads
	# IS_OPTIONAL: true

	# L0_Unit_Tests_GPU_Hydra:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_Hydra') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/hydra -m "not pleasefixme" --with_downloads

	# L0_Unit_Tests_GPU_Lightning:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_Lightning') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest tests/lightning -m "not pleasefixme" --with_downloads

	# L0_Unit_Tests_GPU_Others:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_GPU_Others') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# NEMO_NUMBA_MINVER=0.53 pytest -m "not pleasefixme" --with_downloads \
	# --ignore=tests/collections/asr \
	# --ignore=tests/collections/audio \
	# --ignore=tests/collections/common \
	# --ignore=tests/collections/llm \
	# --ignore=tests/collections/multimodal \
	# --ignore=tests/collections/nlp \
	# --ignore=tests/collections/tts \
	# --ignore=tests/core \
	# --ignore=tests/core_ptl \
	# --ignore=tests/hydra \
	# --ignore=tests/lightning \
	# --ignore=tests/utils

	# L0: CPU unit tests
	L0_Unit_Tests_CPU_ASR:
	needs: [cicd-test-container-setup]
	uses: ./.github/workflows/_test_template.yml
	if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_ASR') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	with:
	RUNNER: self-hosted-azure-cpu
	TIMEOUT: 20
	SCRIPT: \|
	CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/asr -m "not pleasefixme" --cpu --with_downloads --cov-report=term --cov=nemo/collections/asr --relax_numba_compat


	L0_Unit_Tests_CPU_Audio:
	needs: [cicd-test-container-setup]
	uses: ./.github/workflows/_test_template.yml
	if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Audio') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	with:
	RUNNER: self-hosted-azure-cpu
	SCRIPT: \|
	CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/audio -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat


	COVERAGE:
	runs-on: self-hosted-azure-cpu
	container: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
	needs:
	- L0_Unit_Tests_CPU_ASR
	- L0_Unit_Tests_CPU_Audio
	steps:

	- name: Download artifacts
	uses: actions/download-artifact@v4
	with:
	path: ${{ github.run_id }}

	- name: Combine
	shell: bash -x -e -u -o pipefail {0}
	run: \|
	pip install coverage
	cd ${{ github.run_id }}
	ls -al .
	ls -al coverage-*/
	coverage combine --keep $(ls coverage-*/.coverage)
	coverage report

	# L0_Unit_Tests_CPU_Common:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Common') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-cpu
	# TIMEOUT: 20
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/common -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat

	# L0_Unit_Tests_CPU_LLM:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_LLM') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-cpu
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/llm -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat

	# L0_Unit_Tests_CPU_Multimodal:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Multimodal') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-cpu
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/multimodal -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat

	# L0_Unit_Tests_CPU_NLP:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_NLP') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# TIMEOUT: 20
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/nlp -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat

	# L0_Unit_Tests_CPU_TTS:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_TTS') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-cpu
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/collections/tts -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat

	# L0_Unit_Tests_CPU_Core:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Core') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-cpu
	# TIMEOUT: 20
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/core tests/core_ptl -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat

	# L0_Unit_Tests_CPU_Hydra:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Hydra') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-cpu
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/hydra -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat

	# L0_Unit_Tests_CPU_Lightning:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Lightning') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-cpu
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest tests/lightning -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat

	# L0_Unit_Tests_CPU_Others:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Unit_Tests_CPU_Others') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-cpu
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES="" NEMO_NUMBA_MINVER=0.53 pytest -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat \
	# --ignore=tests/collections/asr \
	# --ignore=tests/collections/audio \
	# --ignore=tests/collections/common \
	# --ignore=tests/collections/llm \
	# --ignore=tests/collections/multimodal \
	# --ignore=tests/collections/nlp \
	# --ignore=tests/collections/tts \
	# --ignore=tests/core \
	# --ignore=tests/core_ptl \
	# --ignore=tests/hydra \
	# --ignore=tests/lightning \
	# --ignore=tests/utils


	# L0_Setup_Test_Data_And_Models:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L0_Setup_Test_Data_And_Models') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python -m tests.setup --save_dir /home/TestData/nlp

	# # - name: L2: Multimodal Imagen Train

	# # L2: Community LLM Checkpoints tests
	# L2_Community_LLM_Checkpoints_tests_Bert:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Bert') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python scripts/checkpoint_converters/convert_bert_hf_to_nemo.py \
	# --input_name_or_path /home/TestData/nlp/megatron_ir/sbert/hf_model/bert-base-uncased \
	# --output_path /tmp/nlp_megatron_ir_sbert/sbert.nemo

	# L2_Community_LLM_Checkpoints_tests_Mamba2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Mamba2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python scripts/checkpoint_converters/convert_mamba2_pyt_to_nemo.py \
	# --input_name_or_path /home/TestData/nlp/megatron_mamba/model_optim_rng.pt \
	# --output_path /tmp/nlp_megatron_mamba/converted_mamba.nemo \
	# --precision=bf16 \
	# --mamba_ssm_ngroups 1

	# L2_Community_LLM_Checkpoints_tests_Llama:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Llama') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES=0 python scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \
	# --input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf-tiny \
	# --output_path=/tmp/nlp_megatron_llama/llama_ci.nemo \
	# --precision=16

	# L2_Community_LLM_Checkpoints_tests_Llama3:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Llama3') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES=0 python scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \
	# --input_name_or_path=/home/TestData/nlp/megatron_llama/llama3-ci-hf \
	# --output_path=/tmp/nlp_megatron_llama_llama3-ci-hf/llama3_ci.nemo \
	# --precision=16

	# L2_Community_LLM_Checkpoints_tests_StarCoder:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_StarCoder') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# mkdir -p /tmp/nlp_megatron_gpt_starcoder-ci-hf/
	# python scripts/checkpoint_converters/convert_starcoder_hf_to_nemo.py \
	# --input_name_or_path /home/TestData/nlp/megatron_gpt/starcoder-ci-hf \
	# --output_path /tmp/nlp_megatron_gpt_starcoder-ci-hf/

	# L2_Community_LLM_Checkpoints_tests_Falcon:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Community_LLM_Checkpoints_tests_Falcon') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python scripts/checkpoint_converters/convert_falcon_hf_to_nemo.py \
	# --input_name_or_path /home/TestData/nlp/megatron_gpt/falcon-ci-hf \
	# --output_path /tmp/nlp_megatron_gpt_falcon-ci-hf/falcon_ci.nemo

	# # L2: Community llava multimodal Checkpoints tests
	# L2_Community_vita_Checkpoints_tests_Llama3:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Community_vita_Checkpoints_tests_Llama3') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# mkdir /tmp/${{ github.run_id }}
	# export PYTHONPATH=/home/TestData/multimodal/video_neva/LLaVA:$PYTHONPATH
	# CUDA_VISIBLE_DEVICES=0 python examples/multimodal/multimodal_llm/neva/convert_llava_to_neva.py \
	# --in-file /home/TestData/multimodal/video_neva/Llama-3-VILA1.5-8B/llm \
	# --mm-projector-ckpt-dir /home/TestData/multimodal/video_neva/Llama-3-VILA1.5-8B/mm_projector \
	# --mm-vision-tower /home/TestData/multimodal/video_neva/Llama-3-VILA1.5-8B/vision_tower \
	# --tokenizer-model /home/TestData/multimodal/video_neva/vita-tokenizer/ \
	# --config-file vita_config.yaml \
	# --out-file=/tmp/${{ github.run_id }}/llama3_ci.nemo \
	# --model-type VITA \
	# --conv-template llama_3

	# # this test is using a 7B model which is too large for GitHub CI
	# # replace the model in this test with a toy model or move the test
	# # to the nightly CI
	# # OPTIONAL_L2_Community_LLM_Checkpoints_tests_Baichuan2:
	# # needs: [cicd-test-container-setup]
	# # runs-on: self-hosted-azure
	# # container:
	# # image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
	# # options:
	# # # --user 0:128
	# # --device=/dev/nvidia0
	# # --gpus all
	# # --shm-size=8g
	# # --env TRANSFORMERS_OFFLINE=0
	# # --env HYDRA_FULL_ERROR=1
	# # --volume /mnt/datadrive/TestData:/home/TestData
	# # steps:
	# # - name: Checkout repository
	# # uses: actions/checkout@v4
	# # - run: \|
	# # python scripts/checkpoint_converters/convert_baichuan2_hf_to_nemo.py \
	# # --input_name_or_path=/home/TestData/nlp/megatron_gpt/Baichuan2-7B-Base \
	# # --output_path=/home/TestData/nlp/megatron_gpt/Baichuan2-7B-Base/ci.nemo
	# # rm -f /home/TestData/nlp/megatron_gpt/Baichuan2-7B-Base/ci.nemo
	# # - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
	# # if: "failure()"

	# L2_PTQ_Llama2_FP8:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_PTQ_Llama2_FP8') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES=0 python scripts/checkpoint_converters/convert_llama_hf_to_nemo.py \
	# --input_name_or_path=/home/TestData/nlp/megatron_llama/llama-ci-hf-tiny \
	# --output_path=/tmp/nlp_megatron_llama/llama_ci.nemo \
	# --precision=16

	# python examples/nlp/language_modeling/megatron_gpt_ptq.py \
	# model.restore_from_path=/tmp/nlp_megatron_llama/llama_ci.nemo \
	# model.tensor_model_parallel_size=2 \
	# trainer.devices=2 \
	# quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
	# quantization.algorithm=fp8 \
	# quantization.num_calib_size=8 \
	# inference.batch_size=2 \
	# export.inference_tensor_parallel=2 \
	# export.sample_output=False \
	# export.save_path=/tmp/nlp_megatron_llama_eo/ci_fp8.qnemo

	# AFTER_SCRIPT: \|
	# rm -rf /tmp/nlp_megatron_llama_eo/ci_fp8.qnemo

	# # OPTIONAL_L2_QAT_Llama2_INT4:
	# # needs: [cicd-test-container-setup]
	# # runs-on: self-hosted-azure
	# # timeout-minutes: 10
	# # container:
	# # image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
	# # options:
	# # # --user 0:128
	# # --device=/dev/nvidia0
	# # --gpus all
	# # --shm-size=8g
	# # --env TRANSFORMERS_OFFLINE=0
	# # --env HYDRA_FULL_ERROR=1
	# # --volume /mnt/datadrive/TestData:/home/TestData
	# # steps:
	# # - name: Checkout repository
	# # uses: actions/checkout@v4
	# # - run: \|
	# # python examples/nlp/language_modeling/tuning/megatron_gpt_qat.py \
	# # quantization.algorithm=int4 \
	# # quantization.num_calib_size=8 \
	# # trainer.devices=1 \
	# # trainer.num_nodes=1 \
	# # trainer.max_steps=4 \
	# # trainer.val_check_interval=4 \
	# # +trainer.limit_val_batches=2 \
	# # exp_manager.explicit_log_dir=llama2_qat_results \
	# # model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
	# # model.tensor_model_parallel_size=1 \
	# # model.pipeline_model_parallel_size=1 \
	# # model.global_batch_size=2 \
	# # model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# # model.data.train_ds.concat_sampling_probabilities=[1.0] \
	# # model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl]

	# # rm -rf llama2_qat_results

	# L2_Distill_Llama2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Distill_Llama2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_distillation.py \
	# trainer.devices=2 \
	# trainer.num_nodes=1 \
	# trainer.precision=bf16 \
	# trainer.max_steps=5 \
	# trainer.log_every_n_steps=5 \
	# trainer.val_check_interval=5 \
	# trainer.limit_val_batches=2 \
	# model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
	# model.kd_teacher_restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
	# model.tensor_model_parallel_size=2 \
	# model.pipeline_model_parallel_size=1 \
	# model.micro_batch_size=1 \
	# model.global_batch_size=4 \
	# model.optim.name=distributed_fused_adam \
	# model.optim.sched.warmup_steps=1 \
	# model.data.data_prefix=[1.0,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
	# exp_manager.exp_dir=/tmp/megatron_llama_distill

	# L2_Prune_Width_Llama2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Prune_Width_Llama2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_prune.py \
	# trainer.devices=2 \
	# trainer.num_nodes=1 \
	# trainer.precision=bf16 \
	# model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
	# model.tensor_model_parallel_size=1 \
	# model.pipeline_model_parallel_size=2 \
	# prune.num_calib_size=8 \
	# prune.ffn_hidden_size=192 \
	# prune.num_attention_heads=2 \
	# prune.num_query_groups=2 \
	# prune.hidden_size=128 \
	# export.save_path=/tmp/ci_prune_width.nemo

	# L2_Prune_Depth_Llama2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Prune_Depth_Llama2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_prune.py \
	# trainer.devices=2 \
	# trainer.num_nodes=1 \
	# trainer.precision=bf16 \
	# model.restore_from_path=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
	# model.tensor_model_parallel_size=2 \
	# model.pipeline_model_parallel_size=1 \
	# 'prune.drop_layers=[1]' \
	# export.save_path=/tmp/ci_prune_depth.nemo

	# # L2: ASR dev run
	# ASR_dev_run_Speech_to_Text:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/asr_ctc/speech_to_text_ctc.py \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_to_text_results

	# ASR_dev_run_Speech_to_Text_WPE_-_CitriNet:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text_WPE_-_CitriNet') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/asr_ctc/speech_to_text_ctc_bpe.py \
	# --config-path="../conf/citrinet/" --config-name="config_bpe" \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
	# model.tokenizer.type="wpe" \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_to_text_wpe_results

	# ASR_dev_run_Speech_Pre-training_-_CitriNet:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'ASR_dev_run_Speech_Pre-training_-_CitriNet') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/speech_pretraining/speech_pre_training.py \
	# --config-path="../conf/ssl/citrinet/" --config-name="citrinet_ssl_ci" \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_pre_training_results

	# ASR_dev_run_Speech_To_Text_Finetuning:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'ASR_dev_run_Speech_To_Text_Finetuning') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/speech_to_text_finetune.py \
	# --config-path="conf/asr_finetune" --config-name="speech_to_text_finetune" \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# init_from_nemo_model=/home/TestData/asr/stt_en_fastconformer_transducer_large.nemo \
	# model.tokenizer.update_tokenizer=False \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_finetuning_results

	# ASR_dev_run_Speech_To_Text_HF_Finetuning:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'ASR_dev_run_Speech_To_Text_HF_Finetuning') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|-
	# python examples/asr/speech_to_text_finetune.py \
	# --config-path="conf/asr_finetune" --config-name="speech_to_text_hf_finetune" \
	# ~model.train_ds.hf_data_cfg \
	# model.train_ds.num_workers=1 \
	# model.train_ds.batch_size=2 model.validation_ds.batch_size=2 \
	# model.train_ds.streaming=true \
	# +model.train_ds.hf_data_cfg.path="librispeech_asr" \
	# +model.train_ds.hf_data_cfg.name=null \
	# +model.train_ds.hf_data_cfg.split="test.clean" \
	# +model.train_ds.hf_data_cfg.streaming=true \
	# +model.train_ds.hf_data_cfg.trust_remote_code=True \
	# ~model.validation_ds.hf_data_cfg \
	# model.validation_ds.streaming=true \
	# +model.validation_ds.hf_data_cfg.path="librispeech_asr" \
	# +model.validation_ds.hf_data_cfg.name=null \
	# +model.validation_ds.hf_data_cfg.split="test.clean" \
	# +model.validation_ds.hf_data_cfg.streaming=true \
	# +model.validation_ds.hf_data_cfg.trust_remote_code=True \
	# ~model.test_ds \
	# init_from_nemo_model=/home/TestData/asr/stt_en_fastconformer_transducer_large.nemo \
	# model.tokenizer.update_tokenizer=False \
	# model.optim.sched.warmup_steps=0 \
	# +model.optim.sched.max_steps=3 \
	# trainer.max_epochs=null \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_finetuning_results

	# ASR_dev_run_Speech_to_Text_WPE_-_Conformer:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text_WPE_-_Conformer') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/asr_ctc/speech_to_text_ctc_bpe.py \
	# --config-path="../conf/conformer" --config-name="conformer_ctc_bpe" \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
	# model.tokenizer.type="wpe" \
	# model.train_ds.batch_size=4 \
	# model.validation_ds.batch_size=4 \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_to_text_wpe_conformer_results

	# # L2: ASR dev run - part two
	# ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/asr_ctc/speech_to_text_ctc_bpe.py \
	# --config-path="../conf/squeezeformer" --config-name="squeezeformer_ctc_bpe" \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
	# model.tokenizer.type="wpe" \
	# model.encoder.d_model=144 \
	# model.train_ds.batch_size=4 \
	# model.validation_ds.batch_size=4 \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_to_text_wpe_squeezeformer_results

	# L2_Speech_to_Text_EMA:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_to_Text_EMA') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/asr/asr_ctc/speech_to_text_ctc.py \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# trainer.devices=2 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# +exp_manager.ema.enable=True \
	# exp_manager.exp_dir=/tmp/speech_to_text_results

	# L2_Speech_to_Text_AED:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_to_Text_AED') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/speech_multitask/speech_to_text_aed.py \
	# model.prompt_format=canary \
	# model.model_defaults.asr_enc_hidden=256 \
	# model.model_defaults.lm_dec_hidden=256 \
	# model.encoder.n_layers=12 \
	# model.transf_encoder.num_layers=0 \
	# model.transf_decoder.config_dict.num_layers=12 \
	# model.train_ds.manifest_filepath=/home/TestData/asr/manifests/canary/an4_canary_train.json \
	# model.train_ds.batch_duration=60 \
	# model.train_ds.use_bucketing=false \
	# model.train_ds.shuffle_buffer_size=100 \
	# model.train_ds.num_workers=0 \
	# +model.train_ds.text_field="answer" \
	# +model.train_ds.lang_field="target_lang" \
	# model.validation_ds.manifest_filepath=/home/TestData/asr/manifests/canary/an4_canary_val.json \
	# +model.validation_ds.text_field="answer" \
	# +model.validation_ds.lang_field="target_lang" \
	# model.validation_ds.num_workers=0 \
	# model.test_ds.manifest_filepath=/home/TestData/asr/manifests/canary/an4_canary_val.json \
	# +model.test_ds.text_field="answer" \
	# +model.test_ds.lang_field="target_lang" \
	# model.test_ds.num_workers=0 \
	# spl_tokens.model_dir=/home/TestData/asr_tokenizers/canary/canary_spl_tokenizer_v32 \
	# model.tokenizer.langs.en.dir=/home/TestData/asr_tokenizers/canary/en/tokenizer_spe_bpe_v1024_max_4 \
	# model.tokenizer.langs.en.type=bpe \
	# ++model.tokenizer.langs.es.dir=/home/TestData/asr_tokenizers/canary/es/tokenizer_spe_bpe_v1024_max_4 \
	# ++model.tokenizer.langs.es.type=bpe \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_to_text_aed_results

	# # L2: Speaker dev run
	# L2_Speaker_dev_run_Speaker_Recognition:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Recognition') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/speaker_tasks/recognition/speaker_reco.py \
	# model.train_ds.batch_size=10 \
	# model.validation_ds.batch_size=2 \
	# model.train_ds.manifest_filepath=/home/TestData/an4_speaker/train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_speaker/dev.json \
	# model.decoder.num_classes=2 \
	# trainer.max_epochs=10 \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speaker_recognition_results

	# L2_Speaker_dev_run_Speaker_Diarization:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Diarization') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/speaker_tasks/diarization/neural_diarizer/multiscale_diar_decoder.py \
	# model.diarizer.speaker_embeddings.model_path=titanet_large \
	# model.train_ds.batch_size=5 \
	# model.validation_ds.batch_size=5 \
	# model.train_ds.emb_dir=examples/speaker_tasks/diarization/speaker_diarization_results \
	# model.validation_ds.emb_dir=examples/speaker_tasks/diarization/speaker_diarization_results \
	# model.train_ds.manifest_filepath=/home/TestData/an4_diarizer/simulated_train/msdd_data.50step.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_diarizer/simulated_valid/msdd_data.50step.json \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speaker_diarization_results

	# L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/speaker_tasks/diarization/neural_diarizer/sortformer_diar_train.py \
	# trainer.devices="[0]" \
	# batch_size=3 \
	# model.train_ds.manifest_filepath=/home/TestData/an4_diarizer/simulated_train/eesd_train_tiny.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_diarizer/simulated_valid/eesd_valid_tiny.json \
	# exp_manager.exp_dir=/tmp/speaker_diarization_results \
	# +trainer.fast_dev_run=True

	# L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/speaker_tasks/diarization/neural_diarizer/e2e_diarize_speech.py \
	# model_path=/home/TestData/an4_diarizer/diar_sortformer_4spk-v1-tiny.nemo \
	# dataset_manifest=/home/TestData/an4_diarizer/simulated_valid/eesd_valid_tiny.json \
	# batch_size=1

	# L2_Speaker_dev_run_Speech_to_Label:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_Speech_to_Label') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/speech_classification/speech_to_label.py \
	# model.train_ds.manifest_filepath=/home/TestData/speech_commands/train_manifest.json \
	# model.validation_ds.manifest_filepath=/home/TestData/speech_commands/test_manifest.json \
	# model.test_ds.manifest_filepath=/home/TestData/speech_commands/test_manifest.json \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# model.preprocessor._target_=nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor \
	# ~model.preprocessor.window_size \
	# ~model.preprocessor.window_stride \
	# ~model.preprocessor.window \
	# ~model.preprocessor.n_mels \
	# ~model.preprocessor.n_mfcc \
	# ~model.preprocessor.n_fft \
	# exp_manager.exp_dir=/tmp/speech_to_label_results

	# L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/speaker_tasks/diarization/clustering_diarizer/offline_diar_with_asr_infer.py \
	# diarizer.manifest_filepath=/home/TestData/an4_diarizer/an4_manifest.json \
	# diarizer.speaker_embeddings.model_path=/home/TestData/an4_diarizer/spkr.nemo \
	# diarizer.speaker_embeddings.parameters.save_embeddings=True \
	# diarizer.speaker_embeddings.parameters.window_length_in_sec=[1.5] \
	# diarizer.speaker_embeddings.parameters.shift_length_in_sec=[0.75] \
	# diarizer.speaker_embeddings.parameters.multiscale_weights=[1.0] \
	# diarizer.asr.model_path=QuartzNet15x5Base-En \
	# diarizer.asr.parameters.asr_based_vad=True \
	# diarizer.out_dir=/tmp/speaker_diarization_asr_results

	# L2_Speaker_dev_run_Clustering_Diarizer_Inference:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_Clustering_Diarizer_Inference') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/speaker_tasks/diarization/clustering_diarizer/offline_diar_infer.py \
	# diarizer.manifest_filepath=/home/TestData/an4_diarizer/an4_manifest.json \
	# diarizer.speaker_embeddings.model_path=/home/TestData/an4_diarizer/spkr.nemo \
	# diarizer.speaker_embeddings.parameters.save_embeddings=True \
	# diarizer.speaker_embeddings.parameters.window_length_in_sec=1.5 \
	# diarizer.speaker_embeddings.parameters.shift_length_in_sec=0.75 \
	# diarizer.speaker_embeddings.parameters.multiscale_weights=null \
	# diarizer.vad.model_path=/home/TestData/an4_diarizer/MatchboxNet_VAD_3x2.nemo \
	# diarizer.out_dir=/tmp/clustering_diarizer_results

	# L2_Speaker_dev_run_Neural_Diarizer_Inference:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_Neural_Diarizer_Inference') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/speaker_tasks/diarization/neural_diarizer/multiscale_diar_decoder_infer.py \
	# diarizer.manifest_filepath=/home/TestData/an4_diarizer/an4_manifest.json \
	# diarizer.msdd_model.model_path=/home/TestData/an4_diarizer/diar_msdd_telephonic.nemo \
	# diarizer.speaker_embeddings.parameters.save_embeddings=True \
	# diarizer.vad.model_path=/home/TestData/an4_diarizer/MatchboxNet_VAD_3x2.nemo \
	# diarizer.out_dir=/tmp/neural_diarizer_results

	# L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tools/speech_data_simulator/multispeaker_simulator.py \
	# --config-path=conf --config-name=data_simulator.yaml \
	# data_simulator.random_seed=42 \
	# data_simulator.manifest_filepath=/home/TestData/LibriSpeechShort/dev-clean-align-short.json \
	# data_simulator.outputs.output_dir=/tmp/test_simulator \
	# data_simulator.session_config.num_sessions=2 \
	# data_simulator.session_config.session_length=60

	# # L2: ASR Multi-dataloader dev run
	# L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/asr_ctc/speech_to_text_ctc.py \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=[/home/TestData/an4_dataset/an4_val.json,/home/TestData/an4_dataset/an4_val.json] \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# trainer.max_epochs=1 \
	# trainer.max_steps=1 \
	# +trainer.num_sanity_val_steps=1 \
	# exp_manager.exp_dir=/tmp/speech_to_text_results

	# L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/speech_classification/speech_to_label.py \
	# model.train_ds.manifest_filepath=/home/TestData/speech_commands/train_manifest.json \
	# model.validation_ds.manifest_filepath=[/home/TestData/speech_commands/test_manifest.json,/home/TestData/speech_commands/test_manifest.json] \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# trainer.max_epochs=1 \
	# trainer.max_steps=1 \
	# +trainer.num_sanity_val_steps=1 \
	# model.preprocessor._target_=nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor \
	# ~model.preprocessor.window_size \
	# ~model.preprocessor.window_stride \
	# ~model.preprocessor.window \
	# ~model.preprocessor.n_mels \
	# ~model.preprocessor.n_mfcc \
	# ~model.preprocessor.n_fft \
	# exp_manager.exp_dir=/tmp/speech_to_label_results

	# # L2: ASR Adapters
	# L2_ASR_Adapters_Linear_Adapters:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_ASR_Adapters_Linear_Adapters') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/asr_adapters/train_asr_adapter.py \
	# model.pretrained_model="stt_en_conformer_ctc_small" \
	# model.adapter.adapter_name="an4" \
	# model.adapter.linear.in_features=176 \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# trainer.max_steps=5 \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_to_text_adapters_results

	# L2_ASR_Adapters_RelPos_MHA_Adapters:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_ASR_Adapters_RelPos_MHA_Adapters') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/asr/asr_adapters/train_asr_adapter.py \
	# model.pretrained_model="stt_en_conformer_ctc_small" \
	# model.adapter.adapter_name="encoder:an4" \
	# model.adapter.adapter_type="tiny_attn" \
	# model.adapter.tiny_attn.n_feat=176 \
	# model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
	# model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
	# trainer.max_steps=5 \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=True \
	# exp_manager.exp_dir=/tmp/speech_to_text_adapters_mha_results

	# # L2: OOMptimizer
	# L2_Speech_Estimate_Duration_Bins:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_Estimate_Duration_Bins') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# set -x
	# # 1D buckets [SSL, CTC]
	# python scripts/speech_recognition/estimate_duration_bins.py \
	# /home/TestData/an4_dataset/an4_train.json \
	# --buckets 5
	# # 2D buckets [CTC, RNNT, TDT] / with tokenizer
	# python scripts/speech_recognition/estimate_duration_bins_2d.py \
	# /home/TestData/an4_dataset/an4_train_lang.json \
	# --tokenizer /home/TestData/asr_tokenizers/canary/en/tokenizer_spe_bpe_v1024_max_4/tokenizer.model \
	# --buckets 5 \
	# --sub-buckets 2
	# # TODO(pzelasko): Figure out how to quote the value in the test properly for CI to accept it...
	# # 2D buckets with prompt [AED/Canary, SpeechLM] / with aggregate tokenizer + prompt format
	# # python scripts/speech_recognition/estimate_duration_bins_2d.py \
	# # /home/TestData/an4_dataset/an4_train_lang.json \
	# # --tokenizer /home/TestData/asr_tokenizers/canary/canary_spl_tokenizer_v32/tokenizer.model \
	# # /home/TestData/asr_tokenizers/canary/en/tokenizer_spe_bpe_v1024_max_4/tokenizer.model \
	# # /home/TestData/asr_tokenizers/canary/es/tokenizer_spe_bpe_v1024_max_4/tokenizer.model \
	# # --langs spl_tokens en es \
	# # --prompt-format canary \
	# # --prompt '[{"role":"user","slots":{"source_lang":"en","target_lang":"en","task":"asr","pnc":"yes"}}]' \
	# # --buckets 5 \
	# # --sub-buckets 2

	# # L2: OOMptimizer
	# L2_Speech_Batch_Size_OOMptimizer:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_Batch_Size_OOMptimizer') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# # 1D bucketing
	# python scripts/speech_recognition/oomptimizer.py \
	# -c /home/TestData/oomptimizer/fast-conformer_ctc_bpe.yaml \
	# -m nemo.collections.asr.models.EncDecCTCModelBPE \
	# -b "[5.0,10.0]"
	# # 2D bucketing
	# python scripts/speech_recognition/oomptimizer.py \
	# -c /home/TestData/oomptimizer/fast-conformer_ctc_bpe.yaml \
	# -m nemo.collections.asr.models.EncDecCTCModelBPE \
	# -b "[[5.0,30],[5.0,45],[10.0,57],[10.0,71]]"

	# # L2: OOMptimizer Canary (has a different batch schema)
	# L2_Speech_Batch_Size_OOMptimizer_Canary:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_Batch_Size_OOMptimizer_Canary') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python scripts/speech_recognition/oomptimizer.py \
	# -c /home/TestData/oomptimizer/fast-conformer_aed.yaml \
	# -m nemo.collections.asr.models.EncDecMultiTaskModel \
	# -b "[[5.0,30],[5.0,45],[10.0,57],[10.0,71]]"

	# # L2: Speech Transcription
	# L2_Speech_Transcription_Speech_to_Text_Transcribe:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_Transcription_Speech_to_Text_Transcribe') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/asr/transcribe_speech.py \
	# pretrained_name="QuartzNet15x5Base-En" \
	# audio_dir="/home/TestData/an4_transcribe/test_subset/" \
	# output_filename="/tmp/stt_test_res.json" \
	# amp=true

	# # L2: Speech Transcription
	# L2_Speech_Transcription_Canary_Transcribe_Full_Manifest:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_Full_Manifest') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/asr/transcribe_speech.py \
	# dataset_manifest=/home/TestData/asr/canary/dev-other-wav-10-canary-fields.json \
	# output_filename=/tmp/preds.json \
	# batch_size=10 \
	# pretrained_name=nvidia/canary-1b \
	# num_workers=0 \
	# amp=false \
	# compute_dtype=bfloat16 \
	# matmul_precision=medium
	# AFTER_SCRIPT: \|
	# rm -rf /tmp/preds.json transcribe.log

	# L2_Speech_Transcription_Canary_Transcribe_With_Prompt:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_With_Prompt') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/asr/transcribe_speech.py \
	# dataset_manifest=/home/TestData/asr/canary/dev-other-wav-10.json \
	# output_filename=preds.json \
	# batch_size=10 \
	# pretrained_name=nvidia/canary-1b \
	# num_workers=0 \
	# amp=false \
	# compute_dtype=bfloat16 \
	# matmul_precision=medium \
	# +prompt.source_lang="en" \
	# +prompt.target_lang="en" \
	# +prompt.task="asr" \
	# +prompt.pnc="no"
	# AFTER_SCRIPT: \|
	# rm -rf preds.json transcribe.log

	# L2_Speech_Transcription_Canary_Transcribe_Audio_Dir:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_Audio_Dir') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/asr/transcribe_speech.py \
	# audio_dir=/home/TestData/asr/canary/dev-other-wav \
	# output_filename=preds.json \
	# batch_size=10 \
	# pretrained_name=nvidia/canary-1b \
	# num_workers=0 \
	# amp=false \
	# compute_dtype=bfloat16 \
	# matmul_precision=medium
	# AFTER_SCRIPT: \|
	# rm -rf preds.json

	# # L2: Segmentation Tool
	# L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# cd tools/ctc_segmentation && \
	# TIME=`date +"%Y-%m-%d-%T"` && \
	# /bin/bash run_segmentation.sh \
	# --MODEL_NAME_OR_PATH="stt_en_citrinet_512_gamma_0_25" \
	# --DATA_DIR=/home/TestData/ctc_segmentation/eng \
	# --OUTPUT_DIR=/tmp/ctc_seg_en/output${TIME} \
	# --LANGUAGE=en \
	# --USE_NEMO_NORMALIZATION="TRUE" && \
	# python /home/TestData/ctc_segmentation/verify_alignment.py \
	# -r /home/TestData/ctc_segmentation/eng/eng_valid_segments_1.7.txt \
	# -g /tmp/ctc_seg_en/output${TIME}/verified_segments/nv_test_segments.txt;

	# L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# cd tools/ctc_segmentation && \
	# TIME=`date +"%Y-%m-%d-%T"` && \
	# /bin/bash run_segmentation.sh \
	# --MODEL_NAME_OR_PATH=/home/TestData/ctc_segmentation/QuartzNet15x5-Ru-e512-wer14.45.nemo \
	# --DATA_DIR=/home/TestData/ctc_segmentation/ru \
	# --OUTPUT_DIR=/tmp/ctc_seg_ru/output${TIME} \
	# --LANGUAGE=ru \
	# --ADDITIONAL_SPLIT_SYMBOLS=";" && \
	# python /home/TestData/ctc_segmentation/verify_alignment.py \
	# -r /home/TestData/ctc_segmentation/ru/valid_ru_segments_1.7.txt \
	# -g /tmp/ctc_seg_ru/output${TIME}/verified_segments/ru_segments.txt;

	# # L2: G2P Models
	# L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# cd examples/tts/g2p && \
	# TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR_CONFORMER=output_ctc_${TIME} && \
	# python g2p_train_and_evaluate.py \
	# train_manifest=/home/TestData/g2p/g2p.json \
	# validation_manifest=/home/TestData/g2p/g2p.json \
	# model.test_ds.manifest_filepath=/home/TestData/g2p/g2p.json \
	# model.tokenizer.dir=/home/TestData/g2p/tokenizer_spe_unigram_v512 \
	# trainer.max_epochs=1 \
	# model.max_source_len=64 \
	# trainer.devices=1 \
	# do_training=True \
	# do_testing=True \
	# exp_manager.exp_dir=${OUTPUT_DIR_CONFORMER} \
	# +exp_manager.use_datetime_version=False\
	# +exp_manager.version=test \
	# --config-name=g2p_conformer_ctc && \
	# python g2p_inference.py \
	# pretrained_model=${OUTPUT_DIR_CONFORMER}/G2P-Conformer-CTC/test/checkpoints/G2P-Conformer-CTC.nemo \
	# manifest_filepath=/home/TestData/g2p/g2p.json \
	# phoneme_field=text

	# # TODO: pleasefixme @redoctopus
	# # - name: ByT5G2P training, evaluation and inference
	# # run: \|
	# # cd examples/tts/g2p && \
	# # TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR_T5=output_byt5_${TIME} && \
	# # python g2p_train_and_evaluate.py \
	# # train_manifest=/home/TestData/g2p/g2p.json \
	# # validation_manifest=/home/TestData/g2p/g2p.json \
	# # model.test_ds.manifest_filepath=/home/TestData/g2p/g2p.json \
	# # trainer.max_epochs=1 \
	# # model.max_source_len=64 \
	# # trainer.devices=1 \
	# # do_training=True \
	# # do_testing=True \
	# # exp_manager.exp_dir=${OUTPUT_DIR_T5} \
	# # +exp_manager.use_datetime_version=False\
	# # +exp_manager.version=test && \
	# # python g2p_inference.py \
	# # pretrained_model=${OUTPUT_DIR_T5}/T5G2P/test/checkpoints/T5G2P.nemo \
	# # manifest_filepath=/home/TestData/g2p/g2p.json \
	# # phoneme_field=text
	# # }
	# # }
	# # - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
	# # if: "failure()"

	# L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# cd examples/tts/g2p && \
	# TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR=output_${TIME} && \
	# python g2p_heteronym_classification_train_and_evaluate.py \
	# train_manifest=/home/TestData/g2p/manifest.json \
	# validation_manifest=/home/TestData/g2p/manifest.json \
	# test_manifest=/home/TestData/g2p/manifest.json \
	# model.wordids=/home/TestData/g2p/wordids.tsv \
	# trainer.max_epochs=1 \
	# model.max_seq_length=64 \
	# do_training=True \
	# do_testing=True \
	# exp_manager.exp_dir=${OUTPUT_DIR} \
	# +exp_manager.use_datetime_version=False\
	# +exp_manager.version=test && \
	# python g2p_heteronym_classification_inference.py \
	# manifest=/home/TestData/g2p/manifest.json \
	# pretrained_model=${OUTPUT_DIR}/HeteronymClassification/test/checkpoints/HeteronymClassification.nemo \
	# output_manifest=preds.json

	# # L2: Parallel Pretraining BERT pretraining from Text/Preprocessed
	# L2_Pretraining_BERT_pretraining_from_Text:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Pretraining_BERT_pretraining_from_Text') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# cd examples/nlp/language_modeling && \
	# python bert_pretraining.py \
	# --config-name=bert_pretraining_from_text_config.yaml \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# trainer.precision=16 \
	# +trainer.fast_dev_run=true \
	# model.train_ds.data_file=/home/TestData/nlp/wikitext-2/train.txt \
	# model.train_ds.batch_size=32 \
	# model.validation_ds.data_file=/home/TestData/nlp/wikitext-2/valid.txt \
	# model.validation_ds.batch_size=32 \
	# model.language_model.config_file=/home/TestData/nlp/bert_configs/bert_3200.json \
	# model.optim.lr=0.01 \
	# model.optim.sched.warmup_ratio=0.1 \
	# model.tokenizer.tokenizer_name=sentencepiece \
	# model.tokenizer.tokenizer_model=/home/TestData/nlp/wikitext-2/tokenizer_bpe_v3193/tokenizer.model \
	# model.mask_prob=0.15 \
	# model.short_seq_prob=0.1 \
	# exp_manager.exp_dir=/tmp/PretrainingBERTFromText;
	# # AFTER_SCRIPT: \|
	# # rm -f /home/TestData/nlp/wikitext-2/*.pkl
	# #rm -rf examples/nlp/language_modeling/PretrainingBERTFromText

	# L2_Pretraining_BERT_from_Preprocessed:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Pretraining_BERT_from_Preprocessed') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# cd examples/nlp/language_modeling && \
	# python bert_pretraining.py \
	# --config-name=bert_pretraining_from_preprocessed_config.yaml \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# trainer.precision=16 \
	# +trainer.fast_dev_run=false \
	# +trainer.max_epochs=1 \
	# +trainer.limit_val_batches=0 \
	# +trainer.limit_train_batches=1 \
	# model.train_ds.data_file=/home/TestData/nlp/wiki_book_mini/training \
	# model.train_ds.batch_size=8 \
	# model.language_model.lm_checkpoint=/home/TestData/nlp/bert_ckpts/nemo1.0/bert_base_uncased_mlm_final_1074591_nemo1.0.pt \
	# model.language_model.config_file=/home/TestData/nlp/bert_configs/uncased_L-12_H-768_A-12.json \
	# model.optim.lr=0.875e-4 \
	# model.optim.weight_decay=0.01 \
	# model.optim.sched.warmup_ratio=0.01 \
	# exp_manager.exp_dir=PretrainingBERTFromPreprocessed \
	# exp_manager.create_checkpoint_callback=False \

	# #rm -rf examples/nlp/language_modeling/PretrainingBERTFromPreprocessed


	# # TODO: remove +model.optim.capturable=True when Pytorch fix: https://github.com/pytorch/pytorch/pull/81858
	# # is in the release container
	# # L2: NMT Attention is All You Need Training
	# L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/nlp/machine_translation/enc_dec_nmt.py \
	# --config-path=conf \
	# --config-name=aayn_base \
	# do_testing=false \
	# model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/spm_4k_ende.model \
	# model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/spm_4k_ende.model \
	# model.encoder.num_layers=1 \
	# model.encoder.hidden_size=64 \
	# model.encoder.inner_size=256 \
	# model.decoder.num_layers=1 \
	# model.decoder.hidden_size=64 \
	# model.decoder.inner_size=256 \
	# +model.optim.capturable=True \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.val_check_interval=2 \
	# +trainer.limit_val_batches=1 \
	# +trainer.max_steps=2 \
	# trainer.precision=16 \
	# +exp_manager.explicit_log_dir=examples/nlp/machine_translation/nmt_results \
	# +exp_manager.create_checkpoint_callback=true

	# python examples/nlp/machine_translation/enc_dec_nmt.py \
	# --config-path=conf \
	# --config-name=aayn_base \
	# do_testing=true \
	# model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/spm_4k_ende.model \
	# model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/spm_4k_ende.model \
	# model.encoder.num_layers=1 \
	# model.encoder.hidden_size=64 \
	# model.encoder.inner_size=256 \
	# model.decoder.num_layers=1 \
	# model.decoder.hidden_size=64 \
	# model.decoder.inner_size=256 \
	# +model.optim.capturable=True \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.val_check_interval=10 \
	# +trainer.limit_val_batches=1 \
	# +trainer.limit_test_batches=1 \
	# +trainer.max_steps=10 \
	# +exp_manager.explicit_log_dir=examples/nlp/machine_translation/nmt_results \
	# +exp_manager.create_checkpoint_callback=true \
	# +exp_manager.resume_if_exists=True
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/machine_translation/nmt_results
	# L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# cd examples/nlp/machine_translation && \
	# python enc_dec_nmt.py \
	# --config-path=conf \
	# --config-name=aayn_base \
	# do_testing=true \
	# model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/spm_4k_ende.model \
	# model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/spm_4k_ende.model \
	# model.encoder.pre_ln=true \
	# model.decoder.pre_ln=true \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=true \
	# +trainer.limit_test_batches=2 \
	# exp_manager=null

	# L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# cd examples/nlp/machine_translation && \
	# python enc_dec_nmt.py \
	# --config-path=conf \
	# --config-name=aayn_base \
	# do_testing=true \
	# model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-en-de.src \
	# model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-en-de.ref \
	# model.validation_ds.src_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.src,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.src] \
	# model.validation_ds.tgt_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.ref,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.ref] \
	# model.test_ds.src_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.src,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.src] \
	# model.test_ds.tgt_file_name=[/home/TestData/nlp/nmt/toy_data/wmt13-en-de.ref,/home/TestData/nlp/nmt/toy_data/wmt14-en-de.ref] \
	# model.encoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/spm_4k_ende.model \
	# model.decoder_tokenizer.tokenizer_model=/home/TestData/nlp/nmt/toy_data/spm_4k_ende.model \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=true \
	# +trainer.limit_test_batches=2 \
	# exp_manager=null

	# # L2: NMT Attention is All You Need Inference
	# L2_NMT_Attention_is_All_You_Need_Inference:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Inference') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# cd examples/nlp/machine_translation && \
	# python nmt_transformer_infer.py \
	# --model=/home/TestData/nlp/nmt/toy_data/enes_v16k_s100k_6x6.nemo \
	# --srctext=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.test.src \
	# --tgtout=/home/TestData/nlp/nmt/toy_data/out.txt \
	# --target_lang en \
	# --source_lang de

	# # L2: NMT Attention is All You Need Finetuning
	# L2_NMT_Attention_is_All_You_Need_Finetuning:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Finetuning') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# cd examples/nlp/machine_translation && \
	# python enc_dec_nmt_finetune.py \
	# model_path=/home/TestData/nlp/nmt/toy_data/enes_v16k_s100k_6x6.nemo \
	# trainer.devices=1 \
	# ~trainer.max_epochs \
	# model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.test_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.test_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# +trainer.val_check_interval=10 \
	# +trainer.limit_val_batches=1 \
	# +trainer.limit_test_batches=1 \
	# +trainer.max_steps=10 \
	# +exp_manager.exp_dir=examples/nlp/machine_translation/nmt_finetune \
	# +exp_manager.create_checkpoint_callback=True \
	# +exp_manager.checkpoint_callback_params.monitor=val_sacreBLEU \
	# +exp_manager.checkpoint_callback_params.mode=max \
	# +exp_manager.checkpoint_callback_params.save_best_model=true
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/machine_translation/nmt_finetune

	# # L2: NMT Tarred Dataset Creation
	# L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# cd examples/nlp/machine_translation && \
	# python enc_dec_nmt.py \
	# --config-path=conf \
	# --config-name=aayn_base \
	# do_training=false \
	# model.preproc_out_dir=$PWD/preproc_out_dir \
	# model.train_ds.use_tarred_dataset=true \
	# model.train_ds.n_preproc_jobs=2 \
	# model.train_ds.lines_per_dataset_fragment=500 \
	# model.train_ds.num_batches_per_tarfile=10 \
	# model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.encoder_tokenizer.vocab_size=2000 \
	# model.decoder_tokenizer.vocab_size=2000 \
	# ~model.test_ds \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.fast_dev_run=true \
	# exp_manager=null

	# L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# cd examples/nlp/machine_translation && \
	# python create_tarred_parallel_dataset.py \
	# --src_fname /home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# --tgt_fname /home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# --out_dir $PWD/out_dir \
	# --encoder_tokenizer_vocab_size=2000 \
	# --decoder_tokenizer_vocab_size=2000 \
	# --tokens_in_batch=1000 \
	# --lines_per_dataset_fragment=500 \
	# --num_batches_per_tarfile=10 \
	# --n_preproc_jobs=2

	# L2_Megatron_NMT_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_NMT_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/machine_translation/megatron_nmt_training.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# +trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/machine_translation/megatron_nmt_results \
	# model.tensor_model_parallel_size=2 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.encoder.activation="swiglu" \
	# model.encoder.masked_softmax_fusion=False \
	# model.encoder.bias_activation_fusion=False \
	# model.encoder.activations_checkpoint_method="block" \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.decoder.num_layers=2 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.activation="swiglu" \
	# model.decoder.masked_softmax_fusion=False \
	# model.decoder.bias_activation_fusion=False \
	# model.decoder.activations_checkpoint_method="block" \
	# model.decoder.activations_checkpoint_num_layers=1 \
	# model.micro_batch_size=2 \
	# model.global_batch_size=4 \
	# model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.train_ds.num_workers=1 \
	# model.validation_ds.num_workers=1 \
	# ~model.test_ds \
	# model.train_ds.dataset_type=text_memmap \
	# model.encoder_tokenizer.library=sentencepiece \
	# model.encoder_tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model \
	# model.decoder_tokenizer.library=sentencepiece \
	# model.decoder_tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model
	# # Change val_check_interval to 1 for resume as the len(dataloder) is 1 due to max_steps being the same as that of training and Lightning 2.0 raises an error
	# # if val_check_interval > len(dataloder: https://github.com/Lightning-AI/lightning/blob/2.0.6/src/lightning/pytorch/loops/fit_loop.py#L259 at the beginning of fit_loop.run()
	# python examples/nlp/machine_translation/megatron_nmt_training.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=1 \
	# +trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/machine_translation/megatron_nmt_results \
	# model.tensor_model_parallel_size=2 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.encoder.activation="swiglu" \
	# model.encoder.masked_softmax_fusion=False \
	# model.encoder.bias_activation_fusion=False \
	# model.encoder.activations_checkpoint_method="block" \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.decoder.num_layers=2 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.activation="swiglu" \
	# model.decoder.masked_softmax_fusion=False \
	# model.decoder.bias_activation_fusion=False \
	# model.decoder.activations_checkpoint_method="block" \
	# model.decoder.activations_checkpoint_num_layers=1 \
	# model.micro_batch_size=2 \
	# model.global_batch_size=4 \
	# model.train_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.train_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.validation_ds.src_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src \
	# model.validation_ds.tgt_file_name=/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref \
	# model.train_ds.num_workers=1 \
	# model.validation_ds.num_workers=1 \
	# ~model.test_ds \
	# model.train_ds.dataset_type=text_memmap \
	# model.encoder_tokenizer.library=sentencepiece \
	# model.encoder_tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model \
	# model.decoder_tokenizer.library=sentencepiece \
	# model.decoder_tokenizer.model=/home/TestData/nlp/nmt/toy_data/spm_64k_all_langs_plus_en.model
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/machine_translation/megatron_nmt_results

	# L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_bert_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=bf16 \
	# model.megatron_amp_O2=True \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=2 \
	# model.optim.sched.constant_steps=2 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings

	# python examples/nlp/language_modeling/megatron_bert_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=20 \
	# trainer.precision=bf16 \
	# model.megatron_amp_O2=True \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=2 \
	# model.optim.sched.constant_steps=2 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings

	# L2_Megatron_Core_Bert_Pretraining_and_Resume_Training:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Core_Bert_Pretraining_and_Resume_Training') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_bert_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
	# model.mcore_bert=True \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.sequence_parallel=True \
	# model.optim.sched.warmup_steps=2 \
	# model.optim.sched.constant_steps=2 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method="block" \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings

	# python examples/nlp/language_modeling/megatron_bert_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=20 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/bert_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# model.mcore_bert=True \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=2 \
	# model.optim.sched.constant_steps=2 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_bert/data/bert/vocab.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method="block" \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence,.5,/home/TestData/nlp/megatron_bert/data/bert/simple_wiki_bert_preproc_text_sentence] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/bert_index_mappings
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/bert_pretrain_results
	# rm -rf examples/nlp/language_modeling/bert_index_mappings

	# L2_RAG_Pipeline_Indexing:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_RAG_Pipeline_Indexing') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/rag/rag_indexing.py \
	# trainer.num_nodes=1 \
	# trainer.devices=1 \
	# trainer.precision="bf16-mixed" \
	# indexing.embedder.model_path="/home/TestData/nlp/rag_pipeline/testing_models/embedders/sbert_nemo.nemo" \
	# indexing.embedder.embed_batch_size=128 \
	# indexing.data.data_path="/home/TestData/nlp/rag_pipeline/testing_data/corpus_data/sample_data" \
	# indexing.data.chunk_size=256 \
	# indexing.data.chunk_overlap=10 \
	# indexing.index_path="/home/TestData/nlp/rag_pipeline/testing_data/saved_index/sample_index"

	# L2_RAG_Pipeline_Generating:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_RAG_Pipeline_Generating') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/rag/rag_generating.py \
	# trainer.devices=1 \
	# trainer.precision="bf16-mixed" \
	# indexing.embedder.model_path="/home/TestData/nlp/rag_pipeline/testing_models/embedders/sbert_nemo.nemo" \
	# indexing.index_path="/home/TestData/nlp/rag_pipeline/testing_data/saved_index/sample_index" \
	# generating.llm.model_path="/home/TestData/nlp/rag_pipeline/testing_models/llms/megatron_gpt_125m.nemo" \
	# generating.inference.tokens_to_generate=50 \
	# generating.inference.greedy=False \
	# generating.inference.temperature=1.0 \
	# generating.query="Which art schools did I applied to?"

	# L2_Megatron_GPT_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-2-h100
	# SCRIPT: \|
	# # This is to improve p2p overlap on H100
	# export NVTE_FWD_LAYERNORM_SM_MARGIN=8
	# export NVTE_BWD_LAYERNORM_SM_MARGIN=8
	# export TORCH_NCCL_AVOID_RECORD_STREAMS=1
	# export NCCL_MIN_NCHANNELS=4
	# # TP overlap is not supported in docker environment
	# #NVTE_UB_SPLIT_RS: 0
	# #NVTE_UB_ATOMIC_GEMM_RS: 1
	# #NVTE_RS_STRIDED_ATOMIC: 1
	# #NVTE_UB_FP8_RS: 1
	# # Increase p2p chunksize to 2MB
	# export NCCL_P2P_NET_CHUNKSIZE=2097152
	# # Disable gc when switching to/from validation steps
	# export NEMO_MANUAL_GC_IN_VALIDATION=0

	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=3 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=/tmp/examples_gpt_pretrain_results_te_autocast \
	# ++model.transformer_engine=True \
	# ++model.fp8=True \
	# ++model.fp8_hybrid=True \
	# ++model.fp8_amax_history_len=1024 \
	# ++model.fp8_amax_compute_algo=max \
	# ++model.reduce_amax=True \
	# ++model.use_te_rng_tracker=True \
	# ++model.name=megatron_gpt_full_te_layer_autocast \
	# model.ub_tp_comm_overlap=False \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=distributed_fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=1 \
	# model.optim.sched.constant_steps=1 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.bias=False \
	# model.bias_activation_fusion=False \
	# model.bias_dropout_add_fusion=False \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=2 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_granularity=full \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.validation_drop_last=False \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=/tmp/examples_gpt_pretrain_results_te_autocast/gpt_mapping

	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=6 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=/tmp/examples_gpt_pretrain_results_te_autocast \
	# exp_manager.resume_if_exists=True \
	# ++model.transformer_engine=True \
	# ++model.fp8=True \
	# ++model.fp8_hybrid=True \
	# ++model.fp8_amax_history_len=1024 \
	# ++model.fp8_amax_compute_algo=max \
	# ++model.reduce_amax=True \
	# ++model.use_te_rng_tracker=True \
	# ++model.name=megatron_gpt_full_te_layer_autocast \
	# model.ub_tp_comm_overlap=False \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=distributed_fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=2 \
	# model.optim.sched.constant_steps=2 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.bias=False \
	# model.bias_activation_fusion=False \
	# model.bias_dropout_add_fusion=False \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=2 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_granularity=full \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.validation_drop_last=False \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=/tmp/examples_gpt_pretrain_results_te_autocast/gpt_mapping


	# L2_Megatron_GPT_Skip_Train:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Skip_Train') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# model.skip_train=True \
	# model.tensor_model_parallel_size=2 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.data.data_prefix=[] \
	# model.data.data_impl=mock \
	# model.dist_ckpt_format=torch_dist
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/gpt_pretrain_results
	# rm -rf examples/nlp/language_modeling/gpt_index_mappings

	# L2_Megatron_GPT_with_Rope_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_with_Rope_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=3 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=1 \
	# model.optim.sched.constant_steps=1 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.position_embedding_type=rope \
	# model.rotary_percentage=0.5 \
	# model.bias=False \
	# model.bias_activation_fusion=False \
	# model.bias_dropout_add_fusion=False \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_granularity=full \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings

	# # commented out to save time on github ci @adithyare
	# # python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# # trainer.devices=2 \
	# # trainer.accelerator=gpu \
	# # trainer.log_every_n_steps=1 \
	# # trainer.val_check_interval=2 \
	# # trainer.limit_val_batches=1 \
	# # trainer.accumulate_grad_batches=1 \
	# # trainer.max_steps=6 \
	# # trainer.gradient_clip_val=1.0 \
	# # exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# # exp_manager.resume_if_exists=True \
	# # model.tensor_model_parallel_size=2 \
	# # model.optim.name=fused_adam \
	# # model.optim.lr=2e-4 \
	# # model.optim.sched.warmup_steps=2 \
	# # model.optim.sched.constant_steps=2 \
	# # model.optim.sched.min_lr=8e-5 \
	# # model.max_position_embeddings=128 \
	# # model.encoder_seq_length=128 \
	# # model.data.seq_length=128 \
	# # model.position_embedding_type=rope \
	# # model.rotary_percentage=0.5 \
	# # model.normalization=rmsnorm \
	# # model.bias=False \
	# # model.bias_activation_fusion=False \
	# # model.bias_dropout_add_fusion=False \
	# # model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# # model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# # model.num_layers=8 \
	# # model.hidden_size=256 \
	# # model.num_attention_heads=8 \
	# # model.activations_checkpoint_method=block \
	# # model.activations_checkpoint_granularity=full \
	# # model.activations_checkpoint_num_layers=1 \
	# # model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# # model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/gpt_pretrain_results
	# rm -rf examples/nlp/language_modeling/gpt_index_mappings

	# # This test requires Ampere but some of the test GPUs are Volta
	# # Need to add a check for compute capability before uncommenting this test
	# # - name: L2: Megatron GPT with Rope Pretraining using Flash Attention and Resume Training TP=2
	# # when {
	# # anyOf {
	# # branch main
	# # changeRequest target: main
	# # }
	# # }
	# # failFast true
	# # - run: \|
	# # python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# # trainer.devices=2 \
	# # trainer.accelerator=gpu \
	# # trainer.log_every_n_steps=1 \
	# # trainer.val_check_interval=2 \
	# # trainer.limit_val_batches=2 \
	# # trainer.accumulate_grad_batches=1 \
	# # trainer.max_steps=3 \
	# # trainer.precision=16 \
	# # trainer.gradient_clip_val=1.0 \
	# # exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# # model.tensor_model_parallel_size=2 \
	# # model.optim.name=fused_adam \
	# # model.optim.lr=2e-4 \
	# # model.optim.sched.warmup_steps=1 \
	# # model.optim.sched.constant_steps=1 \
	# # model.optim.sched.min_lr=8e-5 \
	# # model.max_position_embeddings=128 \
	# # model.encoder_seq_length=128 \
	# # model.data.seq_length=128 \
	# # model.position_embedding_type=rope \
	# # model.rotary_percentage=0.5 \
	# # model.normalization=rmsnorm \
	# # model.bias=False \
	# # model.bias_activation_fusion=False \
	# # model.bias_dropout_add_fusion=False \
	# # model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# # model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# # model.num_layers=8 \
	# # model.hidden_size=256 \
	# # model.num_attention_heads=8 \
	# # model.activations_checkpoint_method=block \
	# # model.activations_checkpoint_granularity=full \
	# # model.activations_checkpoint_num_layers=1 \
	# # model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# # model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
	# # model.use_flash_attention=True "
	# # # commented out to save time on github ci @adithyare
	# # # python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# # # trainer.devices=2 \
	# # # trainer.accelerator=gpu \
	# # # trainer.log_every_n_steps=1 \
	# # # trainer.val_check_interval=2 \
	# # # trainer.limit_val_batches=1 \
	# # # trainer.accumulate_grad_batches=1 \
	# # # trainer.max_steps=6 \
	# # # trainer.precision=16 \
	# # # trainer.gradient_clip_val=1.0 \
	# # # exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# # # exp_manager.resume_if_exists=True \
	# # # model.tensor_model_parallel_size=2 \
	# # # model.optim.name=fused_adam \
	# # # model.optim.lr=2e-4 \
	# # # model.optim.sched.warmup_steps=2 \
	# # # model.optim.sched.constant_steps=2 \
	# # # model.optim.sched.min_lr=8e-5 \
	# # # model.max_position_embeddings=128 \
	# # # model.encoder_seq_length=128 \
	# # # model.data.seq_length=128 \
	# # # model.position_embedding_type=rope \
	# # # model.rotary_percentage=0.5 \
	# # # model.normalization=rmsnorm \
	# # # model.bias=False \
	# # # model.bias_activation_fusion=False \
	# # # model.bias_dropout_add_fusion=False \
	# # # model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# # # model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# # # model.num_layers=8 \
	# # # model.hidden_size=256 \
	# # # model.num_attention_heads=8 \
	# # # model.activations_checkpoint_method=block \
	# # # model.activations_checkpoint_granularity=full \
	# # # model.activations_checkpoint_num_layers=1 \
	# # # model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# # # model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings \
	# # # model.use_flash_attention=True"
	# # rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
	# # rm -rf examples/nlp/language_modeling/gpt_index_mappings"
	# # }
	# # }

	# L2_Megatron_LM_To_NeMo_Conversion:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_LM_To_NeMo_Conversion') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# CUDA_DEVICE_MAX_CONNECTIONS=1 torchrun --nproc_per_node=1 Megatron-LM/pretrain_gpt.py \
	# --mock-data \
	# --distributed-timeout-minutes 60 \
	# --use-mcore-models \
	# --no-mmap-bin-files \
	# --untie-embeddings-and-output-weights \
	# --disable-bias-linear \
	# --train-samples 80 \
	# --init-method-std 0.014 \
	# --position-embedding-type rope \
	# --rotary-base 1000000 \
	# --rotary-percent 1.0 \
	# --squared-relu \
	# --num-layers 4 \
	# --hidden-size 384 \
	# --num-attention-heads 8 \
	# --group-query-attention \
	# --num-query-groups 8 \
	# --ffn-hidden-size 1536 \
	# --kv-channels 128 \
	# --normalization RMSNorm \
	# --attention-dropout 0.0 \
	# --hidden-dropout 0.0 \
	# --exit-duration-in-mins 5750 \
	# --tensor-model-parallel-size 1 \
	# --pipeline-model-parallel-size 1 \
	# --seq-length 8192 \
	# --max-position-embeddings 8192 \
	# --micro-batch-size 1 \
	# --global-batch-size 8 \
	# --lr 6e-4 \
	# --min-lr 6e-6 \
	# --weight-decay 0.1 \
	# --clip-grad 1.0 \
	# --lr-decay-style cosine \
	# --log-interval 1 \
	# --eval-iters 1 \
	# --eval-interval 10 \
	# --tokenizer-type GPT2BPETokenizer \
	# --tokenizer-model /home/TestData/nlp/gpt2_tokenizer \
	# --vocab-file /home/TestData/nlp/gpt2_tokenizer/vocab.json \
	# --merge-file /home/TestData/nlp/gpt2_tokenizer/merges.txt \
	# --save /tmp/mlm_conversion_ckpt \
	# --save-interval 10 \
	# --ckpt-format torch_dist \
	# --ckpt-fully-parallel-save \
	# --ckpt-fully-parallel-load \
	# --async-save \
	# --ckpt-assume-constant-structure \
	# --timing-log-option minmax \
	# --log-params-norm \
	# --log-num-zeros-in-grad \
	# --log-throughput \
	# --bf16 \
	# --adam-beta1 0.9 \
	# --adam-beta2 0.95 \
	# --use-distributed-optimizer \
	# --overlap-grad-reduce \
	# --overlap-param-gather \
	# --manual-gc \
	# --num-workers 2

	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# model.data.data_impl=mock \
	# model.data.data_prefix=[] \
	# model.skip_train=True \
	# model.transformer_engine=True \
	# model.use_flash_attention=False \
	# model.normalization=rmsnorm \
	# model.num_layers=4 \
	# model.hidden_size=384 \
	# model.ffn_hidden_size=1536 \
	# model.num_attention_heads=8 \
	# model.num_query_groups=8 \
	# model.bias=False \
	# model.bias_activation_fusion=False \
	# model.bias_dropout_add_fusion=True \
	# model.masked_softmax_fusion=True \
	# model.encoder_seq_length=8192 \
	# model.max_position_embeddings=8192 \
	# model.data.seq_length=8192 \
	# model.activation=squared-relu \
	# model.transformer_block_type=True \
	# model.micro_batch_size=1 \
	# model.global_batch_size=8 \
	# ++model.rotary_base=1000000 \
	# model.rotary_percentage=1.0 \
	# model.apply_query_key_layer_scaling=False \
	# ++model.group_query_attention=True \
	# model.apply_rope_fusion=True \
	# model.kv_channels=128 \
	# ++model.bert_binary_head=True \
	# ++model.position_embedding_type=rope \
	# ++model.add_position_embedding=True \
	# trainer.limit_val_batches=1 \
	# exp_manager.exp_dir=/tmp/nemo_conversion_ckpt

	# python -m torch.distributed.launch --nproc_per_node=1 examples/nlp/language_modeling/megatron_ckpt_to_nemo.py \
	# --checkpoint_folder /tmp/mlm_conversion_ckpt \
	# --checkpoint_name iter_0000010 \
	# --nemo_file_path /tmp/mlm_to_nemo_test.nemo \
	# --tensor_model_parallel_size 1 \
	# --pipeline_model_parallel_size 1 \
	# --gpus_per_node 1 \
	# --model_type gpt \
	# --hparams_file /tmp/nemo_conversion_ckpt/megatron_gpt/version_0/hparams.yaml \
	# --convert_mlm

	# L2_Megatron_GPT_with_ResetLR_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_with_ResetLR_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=3 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=3 \
	# trainer.precision=bf16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# model.tensor_model_parallel_size=2 \
	# model.megatron_amp_O2=True \
	# model.optim.name=distributed_fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=2 \
	# model.optim.sched.constant_steps=2 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings

	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=3 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=6 \
	# trainer.precision=bf16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# model.reset_lr=True \
	# model.tensor_model_parallel_size=2 \
	# model.megatron_amp_O2=True \
	# model.optim.name=distributed_fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=2 \
	# model.optim.sched.constant_steps=2 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/gpt_pretrain_results
	# rm -rf examples/nlp/language_modeling/gpt_index_mappings

	# L2_Megatron_GPT_with_Drop_Optimizer_States_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_with_Drop_Optimizer_States_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=6 \
	# trainer.precision=bf16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# exp_manager.checkpoint_callback_params.save_last_n_optim_states=1 \
	# model.dist_ckpt_format="torch_dist" \
	# model.tensor_model_parallel_size=2 \
	# model.megatron_amp_O2=True \
	# model.optim.name=distributed_fused_adam \
	# model.optim.lr=2e-4 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/gpt_pretrain_results
	# rm -rf examples/nlp/language_modeling/gpt_index_mappings

	# L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=3 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=1 \
	# model.optim.sched.constant_steps=1 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.position_embedding_type=alibi \
	# model.bias=False \
	# model.bias_activation_fusion=False \
	# model.bias_dropout_add_fusion=False \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_granularity=full \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings

	# # not testing resume functionality to save time on ci @adithyare
	# #python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# #trainer.devices=2 \
	# #trainer.accelerator=gpu \
	# #trainer.log_every_n_steps=1 \
	# #trainer.val_check_interval=2 \
	# #trainer.limit_val_batches=1 \
	# #trainer.accumulate_grad_batches=1 \
	# #trainer.max_steps=6 \
	# #trainer.gradient_clip_val=1.0 \
	# #exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# #exp_manager.resume_if_exists=True \
	# #model.tensor_model_parallel_size=2 \
	# #model.optim.name=fused_adam \
	# #model.optim.lr=2e-4 \
	# #model.optim.sched.warmup_steps=2 \
	# #model.optim.sched.constant_steps=2 \
	# #model.optim.sched.min_lr=8e-5 \
	# #model.max_position_embeddings=128 \
	# #model.encoder_seq_length=128 \
	# #model.data.seq_length=128 \
	# #model.position_embedding_type=alibi \
	# #model.normalization=rmsnorm \
	# #model.bias=False \
	# #model.bias_activation_fusion=False \
	# #model.bias_dropout_add_fusion=False \
	# #model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# #model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# #model.num_layers=8 \
	# #model.hidden_size=256 \
	# #model.num_attention_heads=8 \
	# #model.activations_checkpoint_method=block \
	# #model.activations_checkpoint_granularity=full \
	# #model.activations_checkpoint_num_layers=1 \
	# #model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# #model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/gpt_pretrain_results
	# rm -rf examples/nlp/language_modeling/gpt_index_mappings

	# L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=3 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# model.tensor_model_parallel_size=2 \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=1 \
	# model.optim.sched.constant_steps=1 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.data.seq_length=128 \
	# model.position_embedding_type=kerple \
	# model.bias=False \
	# model.bias_activation_fusion=False \
	# model.bias_dropout_add_fusion=False \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_granularity=full \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings

	# # commented out to save time on github ci @adithyare
	# #python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# #trainer.devices=2 \
	# #trainer.accelerator=gpu \
	# #trainer.log_every_n_steps=1 \
	# #trainer.val_check_interval=2 \
	# #trainer.limit_val_batches=1 \
	# #trainer.accumulate_grad_batches=1 \
	# #trainer.max_steps=6 \
	# #trainer.precision=16 \
	# #trainer.gradient_clip_val=1.0 \
	# #exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# #exp_manager.resume_if_exists=True \
	# #model.tensor_model_parallel_size=2 \
	# #model.optim.name=fused_adam \
	# #model.optim.lr=2e-4 \
	# #model.optim.sched.warmup_steps=2 \
	# #model.optim.sched.constant_steps=2 \
	# #model.optim.sched.min_lr=8e-5 \
	# #model.max_position_embeddings=128 \
	# #model.encoder_seq_length=128 \
	# #model.data.seq_length=128 \
	# #model.position_embedding_type=kerple \
	# #model.normalization=rmsnorm \
	# #model.bias=False \
	# #model.bias_activation_fusion=False \
	# #model.bias_dropout_add_fusion=False \
	# #model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# #model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# #model.num_layers=8 \
	# #model.hidden_size=256 \
	# #model.num_attention_heads=8 \
	# #model.activations_checkpoint_method=block \
	# #model.activations_checkpoint_granularity=full \
	# #model.activations_checkpoint_num_layers=1 \
	# #model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# #model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/gpt_pretrain_results
	# rm -rf examples/nlp/language_modeling/gpt_index_mappings

	# OPTIONAL_L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-2-h100
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=3 \
	# trainer.precision=bf16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# ++model.transformer_engine=True \
	# ++model.fp8=True \
	# ++model.fp8_hybrid=True \
	# ++model.fp8_amax_history_len=1024 \
	# ++model.fp8_amax_compute_algo=max \
	# ++model.reduce_amax=True \
	# ++model.use_te_rng_tracker=True \
	# ++model.name=megatron_gpt_full_te_layer_autocast \
	# model.ub_tp_comm_overlap=False \
	# model.pipeline_model_parallel_size=2 \
	# model.tensor_model_parallel_size=1 \
	# model.mcore_gpt=True \
	# model.megatron_amp_O2=True \
	# model.optim.name=distributed_fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=1 \
	# model.optim.sched.constant_steps=1 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.activation=fast-swiglu \
	# model.bias_activation_fusion=False \
	# model.hidden_dropout=0.0 \
	# model.attention_dropout=0.0 \
	# model.transformer_block_type=normformer \
	# model.headscale=True \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_granularity=full \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.validation_drop_last=False \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings

	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=6 \
	# trainer.precision=bf16 \
	# trainer.gradient_clip_val=1.0 \
	# model.mcore_gpt=True \
	# model.megatron_amp_O2=True \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# ++model.transformer_engine=True \
	# ++model.fp8=True \
	# ++model.fp8_hybrid=True \
	# ++model.fp8_amax_history_len=1024 \
	# ++model.fp8_amax_compute_algo=max \
	# ++model.reduce_amax=True \
	# ++model.use_te_rng_tracker=True \
	# ++model.name=megatron_gpt_full_te_layer_autocast \
	# model.ub_tp_comm_overlap=False \
	# model.pipeline_model_parallel_size=2 \
	# model.tensor_model_parallel_size=1 \
	# model.optim.name=distributed_fused_adam \
	# model.optim.lr=2e-4 \
	# model.optim.sched.warmup_steps=2 \
	# model.optim.sched.constant_steps=2 \
	# model.optim.sched.min_lr=8e-5 \
	# model.max_position_embeddings=128 \
	# model.encoder_seq_length=128 \
	# model.activation=fast-swiglu \
	# model.bias_activation_fusion=False \
	# model.hidden_dropout=0.0 \
	# model.attention_dropout=0.0 \
	# model.transformer_block_type=normformer \
	# model.headscale=True \
	# model.data.seq_length=128 \
	# model.tokenizer.vocab_file=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# model.tokenizer.merge_file=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# model.num_layers=8 \
	# model.hidden_size=256 \
	# model.num_attention_heads=8 \
	# model.activations_checkpoint_method=block \
	# model.activations_checkpoint_granularity=full \
	# model.activations_checkpoint_num_layers=1 \
	# model.data.validation_drop_last=False \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/gpt_pretrain_results
	# rm -rf examples/nlp/language_modeling/gpt_index_mappings
	# IS_OPTIONAL: true

	# L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# mkdir examples/llm/auto_configurator/auto_conf_logs

	# python examples/llm/auto_configurator/auto_config.py \
	# --log_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \
	# --run_number=1

	# python examples/llm/auto_configurator/auto_config.py \
	# --log_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \
	# --run_number=2

	# python examples/llm/auto_configurator/auto_config.py \
	# --log_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \
	# --run_number=3

	# python examples/llm/auto_configurator/auto_config.py \
	# --log_dir=/workspace/examples/llm/auto_configurator/auto_conf_logs \
	# --get_results
	# AFTER_SCRIPT: \|
	# rm -rf examples/llm/auto_configurator/auto_conf_logs

	# L2_Megatron_GPT_Finetuning_PP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Finetuning_PP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
	# trainer.devices=2 \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=2 \
	# +trainer.limit_val_batches=2 \
	# trainer.max_steps=3 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=/tmp/gpt_finetuning_pp2_megatron \
	# model.pipeline_model_parallel_size=2 \
	# model.tensor_model_parallel_size=1 \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt/PP2/gpt_pp2_tp1.nemo \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.peft.peft_scheme=null \
	# model.data.train_ds.micro_batch_size=1 \
	# model.data.train_ds.global_batch_size=4 \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl,/home/TestData/nlp/megatron_sft/trec.jsonl] \
	# model.data.train_ds.concat_sampling_probabilities=[0.3,0.7] \
	# model.data.train_ds.num_workers=0 \
	# model.data.test_ds.micro_batch_size=1 \
	# model.data.test_ds.global_batch_size=1 \
	# model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.test_ds.names=[quarel] \
	# model.data.validation_ds.micro_batch_size=1 \
	# model.data.validation_ds.global_batch_size=1 \
	# model.data.validation_ds.num_workers=0 \
	# model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.validation_ds.names=[quarel]

	# python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
	# trainer.devices=2 \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=1 \
	# +trainer.limit_val_batches=2 \
	# trainer.max_steps=3 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=/tmp/gpt_finetuning_pp2_megatron \
	# model.pipeline_model_parallel_size=2 \
	# model.tensor_model_parallel_size=1 \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt/PP2/gpt_pp2_tp1.nemo \
	# model.optim.name=fused_adam \
	# model.optim.lr=2e-4 \
	# model.peft.peft_scheme=null \
	# model.data.train_ds.micro_batch_size=1 \
	# model.data.train_ds.global_batch_size=4 \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl,/home/TestData/nlp/megatron_sft/trec.jsonl] \
	# model.data.train_ds.concat_sampling_probabilities=[0.3,0.7] \
	# model.data.train_ds.num_workers=0 \
	# model.data.test_ds.micro_batch_size=1 \
	# model.data.test_ds.global_batch_size=1 \
	# model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.test_ds.names=[quarel] \
	# model.data.validation_ds.micro_batch_size=1 \
	# model.data.validation_ds.global_batch_size=1 \
	# model.data.validation_ds.num_workers=0 \
	# model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.validation_ds.names=[quarel]

	# L2_Megatron_GPT_Finetuning_StarCoder_PP1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Finetuning_StarCoder_PP1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
	# trainer.devices=1 \
	# trainer.num_nodes=1 \
	# trainer.precision=bf16 \
	# trainer.max_steps=4 \
	# trainer.val_check_interval=4 \
	# trainer.enable_checkpointing=False \
	# +trainer.limit_val_batches=2 \
	# +trainer.limit_test_batches=2 \
	# exp_manager.checkpoint_callback_params.save_best_model=False \
	# exp_manager.exp_dir=/tmp/gpt_sft_results_starcoder_pp1 \
	# model.peft.peft_scheme=none \
	# model.optim.name=distributed_fused_adam \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt/starcoder-ci-nemo/megatron_starcoder_tp1_pp1.nemo \
	# model.tensor_model_parallel_size=1 \
	# model.pipeline_model_parallel_size=1 \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.train_ds.num_workers=0 \
	# model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.validation_ds.num_workers=0 \
	# model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.test_ds.num_workers=0 \
	# model.data.train_ds.concat_sampling_probabilities=[1.0]

	# L2_Megatron_GPT_Reranker:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Reranker') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/information_retrieval/megatron_gpt_reranker_finetuning.py \
	# exp_manager.exp_dir="/tmp/gpt_reranker_workdir/" \
	# model.global_batch_size=4 \
	# model.micro_batch_size=4 \
	# trainer.devices=1 \
	# trainer.num_nodes=1 \
	# trainer.max_epochs=null \
	# trainer.max_steps=20 \
	# trainer.val_check_interval=10 \
	# model.restore_from_path="/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo" \
	# model.peft.lora_tuning.adapter_dim=8 \
	# model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_ir/train.jsonl] \
	# model.data.validation_ds.write_embeddings_to_file=True \
	# model.data.validation_ds.output_file_path_prefix="/home/TestData/nlp/megatron_ir/working_dir/val_embs" \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_ir/train.jsonl]

	# L2_Megatron_GPT_Embedding:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Embedding') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/information_retrieval/megatron_gpt_embedding_finetuning.py \
	# exp_manager.exp_dir="/tmp/gpt_embedding_workdir/" \
	# model.global_batch_size=4 \
	# model.micro_batch_size=4 \
	# trainer.devices=1 \
	# trainer.num_nodes=1 \
	# trainer.max_epochs=null \
	# trainer.max_steps=20 \
	# trainer.val_check_interval=10 \
	# model.restore_from_path="/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo" \
	# model.peft.lora_tuning.adapter_dim=8 \
	# model.data.validation_ds.query_file_names=[/home/TestData/nlp/megatron_ir/test_query.jsonl] \
	# model.data.validation_ds.doc_file_names=[/home/TestData/nlp/megatron_ir/test_doc.jsonl] \
	# model.data.validation_ds.write_embeddings_to_file=True \
	# model.data.validation_ds.output_file_path_prefix="/tmp/gpt_embedding_workdir/val_embs/" \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_ir/train.jsonl]


	# python examples/nlp/information_retrieval/megatron_gpt_embedding_generate.py \
	# trainer.devices=1 \
	# trainer.num_nodes=1 \
	# model.restore_from_path="/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo" \
	# model.peft.restore_from_path="/tmp/gpt_embedding_workdir/megatron_gpt_peft_lora_tuning/checkpoints/megatron_gpt_peft_lora_tuning.nemo" \
	# model.global_batch_size=4 \
	# model.micro_batch_size=4 \
	# model.peft.lora_tuning.adapter_dim=8 \
	# model.data.test_ds.write_embeddings_to_file=True \
	# model.data.test_ds.output_file_path_prefix="/tmp/gpt_embedding_workdir/test_embs" \
	# model.data.test_ds.query_file_names=[/home/TestData/nlp/megatron_ir/test_query.jsonl] \
	# model.data.test_ds.doc_file_names=[/home/TestData/nlp/megatron_ir/test_doc.jsonl]

	# L2_Megatron_GPT_PEFT_Lora_PP2_O2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_PP2_O2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
	# trainer.devices=2 \
	# trainer.log_every_n_steps=1 \
	# trainer.max_epochs=9999 \
	# trainer.max_steps=3 \
	# trainer.val_check_interval=3 \
	# ++trainer.limit_val_batches=2 \
	# trainer.precision=bf16 \
	# exp_manager.exp_dir=/tmp/nlp_peft_lora_tuning_pp2 \
	# model.pipeline_model_parallel_size=2 \
	# model.tensor_model_parallel_size=1 \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo \
	# model.megatron_amp_O2=True \
	# model.peft.peft_scheme=lora \
	# model.answer_only_loss=True \
	# model.micro_batch_size=1 \
	# model.global_batch_size=1 \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.train_ds.concat_sampling_probabilities=[1.0] \
	# model.data.train_ds.num_workers=0 \
	# model.data.validation_ds.num_workers=0 \
	# model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.validation_ds.names=[quarel]

	# python examples/nlp/language_modeling/tuning/megatron_gpt_generate.py \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo \
	# model.peft.restore_from_path=/tmp/nlp_peft_lora_tuning_pp2/megatron_gpt_peft_lora_tuning/checkpoints/megatron_gpt_peft_lora_tuning.nemo \
	# model.pipeline_model_parallel_size=2 \
	# model.tensor_model_parallel_size=1 \
	# trainer.devices=2 \
	# model.megatron_amp_O2=True \
	# model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel_4.jsonl] \
	# model.data.test_ds.names=["quarel4"] \
	# model.global_batch_size=2 \
	# model.micro_batch_size=1 \
	# model.data.test_ds.tokens_to_generate=10 \
	# model.data.test_ds.write_predictions_to_file=True \
	# model.data.test_ds.output_file_path_prefix="/tmp/nlp_peft_lora_tuning_pp2/out" \
	# inference.greedy=True \
	# inference.repetition_penalty=1.0 \
	# inference.outfile_path="/tmp/nlp_peft_lora_tuning_pp2/out.jsonl"

	# L2_Megatron_GPT_PEFT_Lora_TP2_O1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_TP2_O1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
	# trainer.devices=2 \
	# trainer.log_every_n_steps=1 \
	# trainer.max_epochs=9999 \
	# trainer.max_steps=3 \
	# trainer.val_check_interval=3 \
	# ++trainer.limit_val_batches=2 \
	# trainer.precision=bf16 \
	# exp_manager.exp_dir=/tmp/nlp_peft_lora_tuning_pp2_o1 \
	# model.pipeline_model_parallel_size=1 \
	# model.tensor_model_parallel_size=2 \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo \
	# model.peft.peft_scheme="lora" \
	# model.answer_only_loss=True \
	# model.micro_batch_size=1 \
	# model.global_batch_size=1 \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.train_ds.concat_sampling_probabilities=[1.0] \
	# model.data.train_ds.num_workers=0 \
	# model.data.validation_ds.num_workers=0 \
	# model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.validation_ds.names=[quarel]

	# python examples/nlp/language_modeling/tuning/megatron_gpt_generate.py \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo \
	# model.peft.restore_from_path=/tmp/nlp_peft_lora_tuning_pp2_o1/megatron_gpt_peft_lora_tuning/checkpoints/megatron_gpt_peft_lora_tuning.nemo \
	# model.tensor_model_parallel_size=2 \
	# trainer.devices=2 \
	# model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel_4.jsonl] \
	# model.data.test_ds.names=["quarel4"] \
	# model.global_batch_size=2 \
	# model.micro_batch_size=1 \
	# model.data.test_ds.tokens_to_generate=10 \
	# model.data.test_ds.write_predictions_to_file=True \
	# model.data.test_ds.output_file_path_prefix="/tmp/nlp_peft_lora_tuning_pp2_o1/out" \
	# inference.greedy=True \
	# inference.repetition_penalty=1.0 \
	# inference.outfile_path="/tmp/nlp_peft_lora_tuning_pp2_o1/out.jsonl"

	# L2_Megatron_GPT_PEFT_Lora_TP2SP1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_PEFT_Lora_TP2SP1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-2-h100
	# SCRIPT: \|
	# CUDA_DEVICE_MAX_CONNECTIONS=1 python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
	# trainer.devices=2 \
	# trainer.log_every_n_steps=1 \
	# trainer.max_epochs=9999 \
	# trainer.max_steps=3 \
	# trainer.val_check_interval=3 \
	# ++trainer.limit_val_batches=2 \
	# trainer.precision=bf16 \
	# exp_manager.exp_dir=/tmp/nlp_lora_tuning_tp2_sp1 \
	# +model.mcore_gpt=True \
	# model.pipeline_model_parallel_size=1 \
	# model.tensor_model_parallel_size=2 \
	# model.sequence_parallel=True \
	# model.megatron_amp_O2=True \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt/mcore_45M/megatron_llama.nemo \
	# +model.fp8=True \
	# +model.fp8_params=True \
	# +model.fp8_hybrid=True \
	# +model.fp8_e4m3=False \
	# +model.fp8_interval=1 \
	# +model.fp8_margin=0 \
	# +model.fp8_amax_history_len=32 \
	# +model.fp8_amax_compute_algo=max \
	# +model.reduce_amax=False \
	# +model.ub_tp_comm_overlap=False \
	# +model.tp_comm_overlap_ag=False \
	# +model.tp_comm_overlap_rs=False \
	# +model.tp_comm_overlap_disable_qkv=True \
	# +model.attention_backend="unfused" \
	# model.peft.peft_scheme="lora" \
	# model.peft.lora_tuning.adapter_dim=16 \
	# model.peft.lora_tuning.alpha=32 \
	# model.peft.lora_tuning.column_init_method="kaiming" \
	# +model.peft.lora_tuning.dropout_position="pre" \
	# model.peft.lora_tuning.target_modules=["attention"] \
	# model.peft.lora_tuning.adapter_dropout=0.1 \
	# +model.peft.lora_tuning.a2a_experimental=1 \
	# model.answer_only_loss=True \
	# model.micro_batch_size=1 \
	# model.global_batch_size=1 \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.train_ds.concat_sampling_probabilities=[1.0] \
	# model.data.train_ds.num_workers=0 \
	# model.data.validation_ds.num_workers=0 \
	# model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.validation_ds.names=[quarel]

	# L2_Megatron_GPT_Eval:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Eval') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_eval.py \
	# gpt_model_file=/home/TestData/nlp/megatron_gpt/125M/megatron_gpt.nemo \
	# prompts=["How to fix GPU memory? A:"] \
	# tensor_model_parallel_size=1 \
	# inference.tokens_to_generate=32 \
	# trainer.precision=32

	# L2_Megatron_GPT_Eval_PP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_Eval_PP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_eval.py \
	# gpt_model_file=/home/TestData/nlp/megatron_gpt/PP2/gpt_pp2_tp1.nemo \
	# server=False \
	# tensor_model_parallel_size=1 \
	# pipeline_model_parallel_size=2 \
	# trainer.devices=2 \
	# trainer.num_nodes=1 \
	# trainer.precision=32

	# L2_Megatron_GPT_SFT_Eval_inference_seq_len_greaterThan_training_seq_len:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_GPT_SFT_Eval_inference_seq_len_greaterThan_training_seq_len') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/tuning/megatron_gpt_generate.py \
	# model.restore_from_path=/home/TestData/nlp/megatron_gpt_sft/megatron_gpt_rope_sft.nemo \
	# model.peft.restore_from_path=null \
	# model.data.test_ds.file_names=[/home/TestData/nlp/megatron_gpt_sft/sample.jsonl] \
	# model.data.test_ds.names=[test] \
	# model.data.test_ds.global_batch_size=1 \
	# model.data.test_ds.micro_batch_size=1 \
	# model.data.test_ds.tokens_to_generate=30 \
	# model.data.test_ds.max_seq_length=6000 \
	# model.data.test_ds.write_predictions_to_file=True \
	# model.data.test_ds.output_file_path_prefix=examples/nlp/language_modeling/out \
	# inference.greedy=True \
	# inference.repetition_penalty=1.0 \
	# inference.outfile_path=examples/nlp/language_modeling/out.jsonl
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/out.jsonl

	# # TODO: Add this test back. Test was failing on CI machines due to HW error
	# # - name: L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval
	# # when {
	# # anyOf {
	# # branch main
	# # changeRequest target: main
	# # }
	# # }
	# # failFast true
	# # - run: \|
	# # python -m torch.distributed.launch --nproc_per_node=2 \
	# # examples/nlp/language_modeling/megatron_lm_ckpt_to_nemo.py \
	# # --checkpoint_folder=/home/TestData/nlp/megatron_gpt/data/gpt/iter_0008700 \
	# # --checkpoint_name=model_optim_rng.pt \
	# # --hparams_file=/home/TestData/nlp/megatron_gpt/data/gpt/iter_0008700/hparams.yaml \
	# # --nemo_file_path=examples/nlp/language_modeling/small_gpt.nemo \
	# # --model_type=gpt \
	# # --pipeline_model_parallel_size=1 \
	# # --gpus_per_node=2 \
	# # --tensor_model_parallel_size=2"
	# # python examples/nlp/language_modeling/megatron_gpt_eval.py \
	# # --gpt_model_file=examples/nlp/language_modeling/small_gpt.nemo \
	# # --tokens_to_generate=32 \
	# # --tensor_model_parallel_size=2 \
	# # --prompt=This is a test.
	# # rm examples/nlp/language_modeling/small_gpt.nemo

	# # L2_Megatron_Change_Partitions
	# L2_Megatron_Change_Partitions_Reduce_TP_Num_Partitions_-2_to_1-_and_PP_Num_Partitions_-1_to_2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Change_Partitions_Reduce_TP_Num_Partitions_-2_to_1-_and_PP_Num_Partitions_-1_to_2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_change_num_partitions.py \
	# --model_file /home/TestData/nlp/megatron_gpt/TP2/megatron_gpt_tp2.nemo \
	# --target_file /home/TestData/nlp/megatron_gpt/TP2-Temp/test-reduce.nemo \
	# --tensor_model_parallel_size 2 \
	# --target_tensor_model_parallel_size 1 \
	# --pipeline_model_parallel_size 1 \
	# --target_pipeline_model_parallel_size 2
	# AFTER_SCRIPT: \|
	# rm /home/TestData/nlp/megatron_gpt/TP2-Temp/test-reduce.nemo

	# L2_Megatron_Change_Partitions_Increase_TP_Num_Partitions_-2_to_4-_and_PP_Num_Partitions_-1_to_2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Change_Partitions_Increase_TP_Num_Partitions_-2_to_4-_and_PP_Num_Partitions_-1_to_2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_change_num_partitions.py \
	# --model_file /home/TestData/nlp/megatron_gpt/TP2/megatron_gpt_tp2.nemo \
	# --target_file /home/TestData/nlp/megatron_gpt/TP2-Temp/test-increase.nemo \
	# --tensor_model_parallel_size 2 \
	# --target_tensor_model_parallel_size 4 \
	# --pipeline_model_parallel_size 1 \
	# --target_pipeline_model_parallel_size 1
	# AFTER_SCRIPT: \|
	# rm /home/TestData/nlp/megatron_gpt/TP2-Temp/test-increase.nemo

	# L2_Megatron_Core_T5_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Core_T5_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.log_every_n_steps=1 \
	# trainer.max_epochs=null \
	# trainer.max_steps=10 \
	# trainer.val_check_interval=10 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.precision=bf16 \
	# model.megatron_amp_O2=True \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# model.mcore_t5=True \
	# model.transformer_engine=True \
	# model.tensor_model_parallel_size=2 \
	# model.micro_batch_size=4 \
	# model.global_batch_size=4 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.decoder.num_layers=4 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.encoder.transformer_block_type="pre_ln" \
	# model.decoder.transformer_block_type="pre_ln" \
	# model.data.data_prefix=[.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src,.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings \
	# model.data.data_impl=text_mmap \
	# +model.data.data_impl_kwargs.newline_int=10 \
	# +model.data.data_impl_kwargs.header_lines=0 \
	# +model.data.data_impl_kwargs.workers=null \
	# +model.data.data_impl_kwargs.sort_dataset_paths=False

	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.log_every_n_steps=1 \
	# trainer.max_epochs=null \
	# trainer.max_steps=10 \
	# trainer.val_check_interval=10 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.precision=bf16 \
	# model.megatron_amp_O2=True \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# model.mcore_t5=True \
	# model.transformer_engine=True \
	# model.tensor_model_parallel_size=2 \
	# model.micro_batch_size=4 \
	# model.global_batch_size=4 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.decoder.num_layers=4 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.encoder.transformer_block_type="pre_ln" \
	# model.decoder.transformer_block_type="pre_ln" \
	# model.data.data_prefix=[.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src,.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings \
	# model.data.data_impl=text_mmap \
	# +model.data.data_impl_kwargs.newline_int=10 \
	# +model.data.data_impl_kwargs.header_lines=0 \
	# +model.data.data_impl_kwargs.workers=null \
	# +model.data.data_impl_kwargs.sort_dataset_paths=False
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/t5_pretrain_results
	# rm -rf examples/nlp/language_modeling/t5_index_mappings

	# L2_Megatron_T5_with_ALiBi_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_T5_with_ALiBi_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# model.tensor_model_parallel_size=2 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.encoder.activation=swiglu \
	# model.encoder.masked_softmax_fusion=False \
	# model.encoder.bias_activation_fusion=False \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.position_embedding_type=alibi \
	# model.decoder.num_layers=2 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.activation=swiglu \
	# model.decoder.masked_softmax_fusion=False \
	# model.decoder.bias_activation_fusion=False \
	# model.decoder.activations_checkpoint_method=block \
	# model.decoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=pre_ln \
	# model.decoder.transformer_block_type=pre_ln \
	# model.data.data_prefix=[.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src,.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings \
	# model.data.data_impl=text_mmap \
	# +model.data.data_impl_kwargs.newline_int=10 \
	# +model.data.data_impl_kwargs.header_lines=0 \
	# +model.data.data_impl_kwargs.workers=null \
	# +model.data.data_impl_kwargs.sort_dataset_paths=False \
	# model.share_token_embeddings=False \
	# model.share_decoder_tokens_head_embeddings=False

	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=1 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# model.tensor_model_parallel_size=2 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.encoder.activation=swiglu \
	# model.encoder.masked_softmax_fusion=False \
	# model.encoder.bias_activation_fusion=False \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.position_embedding_type=alibi \
	# model.decoder.num_layers=2 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.activation=swiglu \
	# model.decoder.masked_softmax_fusion=False \
	# model.decoder.bias_activation_fusion=False \
	# model.decoder.activations_checkpoint_method=block \
	# model.decoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=pre_ln \
	# model.decoder.transformer_block_type=pre_ln \
	# model.data.data_prefix=[.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src,.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings \
	# model.data.data_impl=text_mmap \
	# +model.data.data_impl_kwargs.newline_int=10 \
	# +model.data.data_impl_kwargs.header_lines=0 \
	# +model.data.data_impl_kwargs.workers=null \
	# +model.data.data_impl_kwargs.sort_dataset_paths=False \
	# model.share_token_embeddings=False \
	# model.share_decoder_tokens_head_embeddings=False
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/t5_pretrain_results
	# rm -rf examples/nlp/language_modeling/t5_index_mappings

	# L2_Megatron_T5_with_KERPLE_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_T5_with_KERPLE_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# model.tensor_model_parallel_size=2 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.encoder.activation=swiglu \
	# model.encoder.masked_softmax_fusion=False \
	# model.encoder.bias_activation_fusion=False \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.position_embedding_type=kerple \
	# model.decoder.num_layers=2 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.activation=swiglu \
	# model.decoder.masked_softmax_fusion=False \
	# model.decoder.bias_activation_fusion=False \
	# model.decoder.activations_checkpoint_method=block \
	# model.decoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=pre_ln \
	# model.decoder.transformer_block_type=pre_ln \
	# model.data.data_prefix=[.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src,.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings \
	# model.data.data_impl=text_mmap \
	# +model.data.data_impl_kwargs.newline_int=10 \
	# +model.data.data_impl_kwargs.header_lines=0 \
	# +model.data.data_impl_kwargs.workers=null \
	# +model.data.data_impl_kwargs.sort_dataset_paths=False \
	# model.share_token_embeddings=False \
	# model.share_decoder_tokens_head_embeddings=False

	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=1 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# model.tensor_model_parallel_size=2 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.encoder.activation=swiglu \
	# model.encoder.masked_softmax_fusion=False \
	# model.encoder.bias_activation_fusion=False \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.position_embedding_type=kerple \
	# model.decoder.num_layers=2 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.activation=swiglu \
	# model.decoder.masked_softmax_fusion=False \
	# model.decoder.bias_activation_fusion=False \
	# model.decoder.activations_checkpoint_method=block \
	# model.decoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=pre_ln \
	# model.decoder.transformer_block_type=pre_ln \
	# model.data.data_prefix=[.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.src,.5,/home/TestData/nlp/nmt/toy_data/wmt14-de-en.ref] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings \
	# model.data.data_impl=text_mmap \
	# +model.data.data_impl_kwargs.newline_int=10 \
	# +model.data.data_impl_kwargs.header_lines=0 \
	# +model.data.data_impl_kwargs.workers=null \
	# +model.data.data_impl_kwargs.sort_dataset_paths=False \
	# model.share_token_embeddings=False \
	# model.share_decoder_tokens_head_embeddings=False
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/t5_pretrain_results
	# rm -rf examples/nlp/language_modeling/t5_index_mappings

	# OPTIONAL_L2_Megatron_T5_Pretraining_and_Resume_Training_PP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L2_Megatron_T5_Pretraining_and_Resume_Training_PP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# model.pipeline_model_parallel_size=2 \
	# model.pipeline_model_parallel_split_rank=1 \
	# model.seq_length=256 \
	# model.encoder.num_layers=4 \
	# model.decoder.num_layers=1 \
	# model.encoder.hidden_size=64 \
	# model.decoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.ffn_hidden_size=2048 \
	# model.encoder.activation=gelu \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=pre_ln \
	# model.decoder.transformer_block_type=post_ln \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings

	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=1 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# model.pipeline_model_parallel_size=2 \
	# model.pipeline_model_parallel_split_rank=1 \
	# model.seq_length=256 \
	# model.encoder.num_layers=4 \
	# model.decoder.num_layers=1 \
	# model.encoder.hidden_size=64 \
	# model.decoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.ffn_hidden_size=2048 \
	# model.encoder.activation=gelu \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=pre_ln \
	# model.decoder.transformer_block_type=post_ln \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/t5_pretrain_results
	# rm -rf examples/nlp/language_modeling/t5_index_mappings
	# IS_OPTIONAL: true

	# L2_Megatron_T5_w_Mixture_of_Expert_Pretraining:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_T5_w_Mixture_of_Expert_Pretraining') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# model.pipeline_model_parallel_split_rank=0 \
	# model.seq_length=256 \
	# model.encoder.num_layers=4 \
	# model.decoder.num_layers=1 \
	# model.encoder.num_moe_experts=4 \
	# model.decoder.num_moe_experts=4 \
	# model.encoder.moe_frequency=3 \
	# model.decoder.moe_frequency=1 \
	# model.encoder.hidden_size=64 \
	# model.decoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.ffn_hidden_size=2048 \
	# model.encoder.activation=gelu \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=pre_ln \
	# model.decoder.transformer_block_type=post_ln \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/t5_pretrain_results
	# rm -rf examples/nlp/language_modeling/t5_index_mappings

	# L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_t5_pretraining.py -cn megatron_ul2_config \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# model.tensor_model_parallel_size=2 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.encoder.activation=swiglu \
	# model.encoder.bias_activation_fusion=False \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=normformer \
	# model.encoder.headscale=True \
	# model.decoder.num_layers=4 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.activation=geglu \
	# model.decoder.bias_activation_fusion=False \
	# model.decoder.activations_checkpoint_method=block \
	# model.decoder.activations_checkpoint_num_layers=1 \
	# model.decoder.transformer_block_type=normformer \
	# model.decoder.headscale=False \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings

	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.devices=2 \
	# trainer.accelerator=gpu \
	# trainer.log_every_n_steps=1 \
	# trainer.val_check_interval=1 \
	# trainer.limit_val_batches=2 \
	# trainer.accumulate_grad_batches=1 \
	# trainer.max_steps=10 \
	# trainer.precision=16 \
	# trainer.gradient_clip_val=1.0 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# exp_manager.resume_if_exists=True \
	# model.tensor_model_parallel_size=2 \
	# model.seq_length=128 \
	# model.encoder.num_layers=4 \
	# model.encoder.hidden_size=64 \
	# model.encoder.num_attention_heads=8 \
	# model.encoder.activation=swiglu \
	# model.encoder.bias_activation_fusion=False \
	# model.encoder.activations_checkpoint_method=block \
	# model.encoder.activations_checkpoint_num_layers=1 \
	# model.encoder.transformer_block_type=normformer \
	# model.encoder.headscale=True \
	# model.decoder.num_layers=4 \
	# model.decoder.hidden_size=64 \
	# model.decoder.num_attention_heads=8 \
	# model.decoder.activation=geglu \
	# model.decoder.bias_activation_fusion=False \
	# model.decoder.activations_checkpoint_method=block \
	# model.decoder.activations_checkpoint_num_layers=1 \
	# model.decoder.transformer_block_type=normformer \
	# model.decoder.headscale=False \
	# model.data.data_prefix=[.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document,.5,/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document] \
	# model.data.index_mapping_dir=examples/nlp/language_modeling/t5_index_mappings
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/t5_pretrain_results
	# rm -rf examples/nlp/language_modeling/t5_index_mappings

	# L2_Megatron_Core_T5_Eval:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Core_T5_Eval') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_t5_eval.py \
	# --model_file /home/TestData/nlp/megatron_t5/220m/megatron_mcore_t5_220m_padding_attnmasktype.nemo \
	# --prompt "How do I fix my GPU memory issue? I am seeing <mask> out of memory." \
	# --tensor_model_parallel_size 1

	# L2_Megatron_Core_T5_PEFT_Lora_TP2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Core_T5_PEFT_Lora_TP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/tuning/megatron_t5_finetuning.py \
	# trainer.devices=2 \
	# trainer.log_every_n_steps=1 \
	# trainer.max_epochs=9999 \
	# trainer.max_steps=3 \
	# trainer.val_check_interval=3 \
	# ++trainer.limit_val_batches=2 \
	# trainer.precision=16 \
	# exp_manager.exp_dir=/tmp/nlp_mcore_t5_lora_tuning_tp2 \
	# model.pipeline_model_parallel_size=1 \
	# model.tensor_model_parallel_size=2 \
	# model.restore_from_path=/home/TestData/nlp/megatron_t5/220m/megatron_mcore_t5_220m_padding_attnmasktype.nemo \
	# model.peft.peft_scheme=lora \
	# model.answer_only_loss=True \
	# model.micro_batch_size=1 \
	# model.global_batch_size=1 \
	# model.data.train_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.train_ds.concat_sampling_probabilities=[1.0] \
	# model.data.train_ds.num_workers=0 \
	# model.data.validation_ds.num_workers=0 \
	# model.data.validation_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel.jsonl] \
	# model.data.validation_ds.names=[quarel]

	# python examples/nlp/language_modeling/tuning/megatron_t5_generate.py \
	# model.restore_from_path=/home/TestData/nlp/megatron_t5/220m/megatron_mcore_t5_220m_padding_attnmasktype.nemo \
	# model.peft.restore_from_path=/tmp/nlp_mcore_t5_lora_tuning_tp2/megatron_t5_peft_lora_tuning/checkpoints/megatron_t5_peft_lora_tuning.nemo \
	# model.peft.restore_from_ckpt_name=null \
	# model.peft.restore_from_hparams_path=null \
	# model.tensor_model_parallel_size=2 \
	# trainer.devices=2 \
	# model.data.test_ds.file_names=[/home/TestData/nlp/megatron_sft/quarel_4.jsonl] \
	# model.data.test_ds.names=[quarel4] \
	# model.global_batch_size=1 \
	# model.micro_batch_size=1 \
	# model.data.test_ds.tokens_to_generate=10 \
	# model.data.test_ds.write_predictions_to_file=True \
	# model.data.test_ds.output_file_path_prefix=/tmp/nlp_mcore_t5_lora_tuning_tp2/out \
	# inference.greedy=True \
	# inference.repetition_penalty=1.0 \
	# inference.outfile_path=/tmp/nlp_mcore_t5_lora_tuning_tp2/out.jsonl

	# L2_VLM_HF_Transformer_PEFT:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/vlm/hf/peft_hf.py --model /home/TestData/vlm/qwen2-2b/ --max-steps 3
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_VLM_HF_Transformer_PEFT_FSDP:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT_FSDP') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/vlm/hf/peft_hf.py --model /home/TestData/vlm/qwen2-2b/ --max-steps 3 --strategy fsdp --devices 2
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_VLM_HF_Transformer_PEFT_4bit:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT_4bit') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/vlm/hf/peft_hf.py --model /home/TestData/vlm/qwen2-2b/ --max-steps 3 --use-4bit
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_VLM_HF_Transformer_SFT_FSDP2:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_VLM_HF_Transformer_SFT_FSDP2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/vlm/hf/sft_fsdp2.py --model /home/TestData/vlm/qwen2-2b/ --max-steps 3
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PEFT:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PEFT') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/peft_hf.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PEFT_nemorun:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PEFT_nemorun') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/peft_nemorun.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --disable-ckpt
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PEFT_2gpu:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/peft_hf.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --devices 2 --strategy ddp --disable-ckpt
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PEFT_2gpu_nemorun:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu_nemorun') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/peft_nemorun.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --devices 2 --strategy ddp --disable-ckpt
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_SFT_2gpu:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/sft.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --devices 2 --strategy ddp
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_SFT_FSDP2_2gpu:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_SFT_FSDP2_2gpu') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/sft_fsdp2.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --devices 2
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PT_2gpu:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PT_2gpu') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/pretrain.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --devices 2 --strategy ddp
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_SFT_2gpu_nemorun:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_nemorun') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/sft_nemorun.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --devices 2 --strategy ddp
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/sft_nemorun_fsdp2.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --devices 2
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PT_2gpu_nemorun:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PT_2gpu_nemorun') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/pretrain_nemorun.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10 --devices 2 --strategy ddp
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PT:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PT') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/pretrain.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PT_nemorun:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PT_nemorun') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/pretrain_nemorun.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_SFT:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_SFT') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/sft.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_SFT_nemorun:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_SFT_nemorun') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/sft_nemorun.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --max-steps 10
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_SFT_TE_Acceleration:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_SFT_TE_Acceleration') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/sft.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --model-accelerator te --max-steps 10
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# L2_HF_Transformer_PT_TE_Acceleration:
	# needs: [ cicd-test-container-setup ]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_HF_Transformer_PT_TE_Acceleration') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 python tests/collections/llm/hf/pretrain.py --model /home/TestData/nlp/hf_gemma/hf_gemma_2b --model-accelerator te --max-steps 10
	# AFTER_SCRIPT: \|
	# rm -rf nemo_experiments

	# # L2: Megatron Mock Data Generation
	# L2_Megatron_Mock_Data_Generation_MockGPTDataset:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Mock_Data_Generation_MockGPTDataset') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
	# trainer.max_steps=10 \
	# trainer.limit_val_batches=7 \
	# trainer.val_check_interval=10 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
	# model.mcore_gpt=True \
	# model.data.data_impl=mock \
	# model.data.data_prefix=[]

	# L2_Megatron_Mock_Data_Generation_MockT5Dataset:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Megatron_Mock_Data_Generation_MockT5Dataset') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/nlp/language_modeling/megatron_t5_pretraining.py \
	# trainer.max_steps=10 \
	# trainer.limit_val_batches=3 \
	# trainer.val_check_interval=10 \
	# exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
	# model.data.data_impl=mock \
	# model.data.data_prefix=[]
	# AFTER_SCRIPT: \|
	# rm -rf examples/nlp/language_modeling/t5_pretrain_results

	# # L2: TTS Fast dev runs 1
	# L2_TTS_Fast_dev_runs_1_Tacotron_2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Tacotron_2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python examples/tts/tacotron2.py \
	# train_dataset=/home/TestData/an4_dataset/an4_train.json \
	# validation_datasets=/home/TestData/an4_dataset/an4_val.json \
	# trainer.devices=1 \
	# trainer.accelerator="gpu" \
	# +trainer.limit_train_batches=1 +trainer.limit_val_batches=1 trainer.max_epochs=1 \
	# trainer.strategy=auto \
	# model.decoder.decoder_rnn_dim=256 \
	# model.decoder.attention_rnn_dim=1024 \
	# model.decoder.prenet_dim=128 \
	# model.postnet.postnet_n_convolutions=3 \
	# model.train_ds.dataloader_params.batch_size=4 \
	# model.train_ds.dataloader_params.num_workers=0 \
	# model.validation_ds.dataloader_params.batch_size=4 \
	# model.validation_ds.dataloader_params.num_workers=0 \
	# ~model.text_normalizer \
	# ~model.text_normalizer_call_kwargs \
	# ~trainer.check_val_every_n_epoch

	# L2_TTS_Fast_dev_runs_1_WaveGlow:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_WaveGlow') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/tts/waveglow.py \
	# train_dataset=/home/TestData/an4_dataset/an4_train.json \
	# validation_datasets=/home/TestData/an4_dataset/an4_val.json \
	# trainer.devices="[0]" \
	# +trainer.limit_train_batches=1 +trainer.limit_val_batches=1 trainer.max_epochs=1 \
	# trainer.strategy=auto \
	# model.train_ds.dataloader_params.batch_size=4 \
	# model.train_ds.dataloader_params.num_workers=0 \
	# model.validation_ds.dataloader_params.batch_size=4 \
	# model.validation_ds.dataloader_params.num_workers=0 \
	# model.waveglow.n_flows=4 \
	# model.waveglow.n_wn_layers=2 \
	# model.waveglow.n_wn_channels=32 \
	# ~trainer.check_val_every_n_epoch

	# L2_TTS_Fast_dev_runs_1_FastPitch:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_FastPitch') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/tts/fastpitch.py \
	# --config-name fastpitch_align_v1.05 \
	# train_dataset=/home/TestData/an4_dataset/an4_train.json \
	# validation_datasets=/home/TestData/an4_dataset/an4_val.json \
	# sup_data_path=/home/TestData/an4_dataset/beta_priors \
	# trainer.devices="[0]" \
	# +trainer.limit_train_batches=1 \
	# +trainer.limit_val_batches=1 \
	# trainer.max_epochs=1 \
	# trainer.strategy=auto \
	# model.pitch_mean=212.35873413085938 \
	# model.pitch_std=68.52806091308594 \
	# model.train_ds.dataloader_params.batch_size=4 \
	# model.train_ds.dataloader_params.num_workers=0 \
	# model.validation_ds.dataloader_params.batch_size=4 \
	# model.validation_ds.dataloader_params.num_workers=0 \
	# model.symbols_embedding_dim=64 \
	# model.input_fft.d_inner=384 \
	# model.input_fft.n_layer=2 \
	# model.output_fft.d_inner=384 \
	# model.output_fft.n_layer=2 \
	# ~trainer.check_val_every_n_epoch \
	# ~model.text_normalizer \
	# ~model.text_normalizer_call_kwargs

	# # OPTIONAL_L2_TTS_Fast_dev_runs_1_RADTTS:
	# # needs: [cicd-test-container-setup]
	# # runs-on: self-hosted-azure
	# # timeout-minutes: 10
	# # container:
	# # image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
	# # options:
	# # # --user 0:128
	# # --device=/dev/nvidia0
	# # --gpus all
	# # --shm-size=8g
	# # --env TRANSFORMERS_OFFLINE=0
	# # --env HYDRA_FULL_ERROR=1
	# # --volume /mnt/datadrive/TestData:/home/TestData
	# # steps:
	# # - name: Checkout repository
	# # uses: actions/checkout@v4
	# # - run: \|
	# # python examples/tts/radtts.py \
	# # train_dataset=/home/TestData/an4_dataset/an4_train.json \
	# # validation_datasets=/home/TestData/an4_dataset/an4_val.json \
	# # sup_data_path=/home/TestData/an4_dataset/radtts_beta_priors \
	# # trainer.devices="[0]" \
	# # +trainer.limit_train_batches=1 \
	# # +trainer.limit_val_batches=1 \
	# # trainer.max_epochs=1 \
	# # trainer.strategy=auto \
	# # model.pitch_mean=212.35873413085938 \
	# # model.pitch_std=68.52806091308594 \
	# # model.train_ds.dataloader_params.batch_size=4 \
	# # model.train_ds.dataloader_params.num_workers=0 \
	# # model.validation_ds.dataloader_params.batch_size=4 \
	# # model.validation_ds.dataloader_params.num_workers=0 \
	# # export_dir=/home/TestData/radtts_test \
	# # model.optim.lr=0.0001 \
	# # model.modelConfig.decoder_use_partial_padding=True \
	# # ~trainer.check_val_every_n_epoch \
	# # ~model.text_normalizer \
	# # ~model.text_normalizer_call_kwargs
	# # #- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
	# # # if: "failure()"

	# L2_TTS_Fast_dev_runs_1_Hifigan:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Hifigan') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python examples/tts/hifigan.py \
	# train_dataset=/home/TestData/an4_dataset/an4_train.json \
	# validation_datasets=/home/TestData/an4_dataset/an4_val.json \
	# trainer.devices="[0]" \
	# +trainer.limit_train_batches=1 \
	# +trainer.limit_val_batches=1 \
	# +trainer.max_epochs=1 \
	# trainer.strategy=auto \
	# model.train_ds.dataloader_params.batch_size=4 \
	# model.train_ds.dataloader_params.num_workers=0 \
	# model.validation_ds.dataloader_params.batch_size=4 \
	# model.validation_ds.dataloader_params.num_workers=0 \
	# model.generator.upsample_initial_channel=64 \
	# +model.debug=true \
	# ~trainer.check_val_every_n_epoch

	# # L2: NeRF
	# # L2_NeRF_DreamFusion:
	# # needs: [cicd-test-container-setup]
	# # runs-on: self-hosted-azure
	# # container:
	# # image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
	# # options:
	# # # --user 0:128
	# # --device=/dev/nvidia0
	# # --gpus all
	# # --shm-size=8g
	# # --env TRANSFORMERS_OFFLINE=0
	# # --env HYDRA_FULL_ERROR=1
	# # --volume /mnt/datadrive/TestData:/home/TestData
	# # steps:
	# # - name: Checkout repository
	# # uses: actions/checkout@v4
	# # - run: \|
	# # python examples/multimodal/text_to_image/nerf/main.py \
	# # trainer.num_nodes=1 \
	# # trainer.devices="[0]" \
	# # trainer.max_steps=1000 \
	# # model.prompt="a DSLR photo of a delicious hamburger" \
	# # exp_manager.exp_dir=examples/multimodal/text_to_image/nerf/dreamfusion_results
	# #
	# # rm -rf examples/multimodal/text_to_image/nerf/dreamfusion_results
	# # - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
	# # if: "failure()"

	# Speech_Checkpoints_tests:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'Speech_Checkpoints_tests') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# TIMEOUT: 20
	# SCRIPT: \|
	# CUDA_VISIBLE_DEVICES=0 python examples/asr/speech_to_text_eval.py \
	# pretrained_name=QuartzNet15x5Base-En \
	# dataset_manifest=/home/TestData/librispeech/librivox-dev-other.json \
	# batch_size=64 \
	# tolerance=0.1012
	# AFTER_SCRIPT: \|
	# rm -f examples/asr/evaluation_transcripts.json

	# L2_Stable_Diffusion_Training:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_Stable_Diffusion_Training') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# rm -rf examples/multimodal/text_to_image/sd_train_results

	# python examples/multimodal/text_to_image/stable_diffusion/sd_train.py \
	# trainer.devices=1 \
	# trainer.max_steps=3 \
	# +trainer.val_check_interval=10 \
	# trainer.limit_val_batches=2 \
	# trainer.gradient_clip_val=0 \
	# exp_manager.exp_dir=examples/multimodal/text_to_image/sd_train_results \
	# exp_manager.create_checkpoint_callback=False \
	# exp_manager.resume_if_exists=False \
	# model.resume_from_checkpoint=null \
	# model.precision=16 \
	# model.micro_batch_size=1 \
	# model.global_batch_size=1 \
	# model.first_stage_key=moments \
	# model.cond_stage_key=encoded \
	# +model.load_vae=False \
	# +model.load_unet=False \
	# +model.load_encoder=False \
	# model.parameterization=v \
	# model.load_only_unet=False \
	# model.text_embedding_dropout_rate=0.0 \
	# model.inductor=True \
	# model.inductor_cudagraphs=False \
	# model.capture_cudagraph_iters=15 \
	# +model.unet_config.num_head_channels=64 \
	# +model.unet_config.use_linear_in_transformer=True \
	# model.unet_config.context_dim=1024 \
	# model.unet_config.use_flash_attention=null \
	# model.unet_config.resblock_gn_groups=16 \
	# model.unet_config.unet_precision=fp16 \
	# +model.unet_config.timesteps=1000 \
	# model.optim.name=megatron_fused_adam \
	# +model.optim.capturable=True \
	# +model.optim.master_weights=True \
	# model.optim.weight_decay=0.01 \
	# model.first_stage_config.from_pretrained=null \
	# model.data.num_workers=16 \
	# model.data.synthetic_data=True
	# AFTER_SCRIPT: \|
	# rm -rf examples/multimodal/text_to_image/sd_train_results

	# L2_NeMo_2_GPT_Pretraining_no_transformer_engine:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_Pretraining_no_transformer_engine') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# pip uninstall -y apex ## TODO: remove when apex is no longer a dependency
	# pip uninstall -y transformer_engine

	# python tests/collections/llm/megatron_gpt_pretraining.py \
	# --devices=2 \
	# --max-steps=3 \
	# --experiment-dir=tests/collections/llm/gpt_pretrain_results \
	# --vocab-path=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# --merges-path=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# --data-path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \
	# --index-mapping-dir=tests/collections/llm/gpt_index_mappings \
	# --no-masked-softmax-fusion

	# python tests/collections/llm/megatron_gpt_pretraining.py \
	# --devices=2 \
	# --max-steps=6 \
	# --experiment-dir=tests/collections/llm/gpt_pretrain_results \
	# --vocab-path=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# --merges-path=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# --data-path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document \
	# --index-mapping-dir=tests/collections/llm/gpt_index_mappings \
	# --no-masked-softmax-fusion
	# AFTER_SCRIPT: \|
	# rm -rf tests/collections/llm/gpt_pretrain_results
	# rm -rf tests/collections/llm/gpt_index_mappings

	# L2_NeMo_2_llama3_pretraining_recipe:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_llama3_pretraining_recipe') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/llama3_pretraining.py \
	# --seq-length 1024 \
	# --devices=2 \
	# --max-steps=6 \
	# --early-stop=3 \
	# --experiment-dir=/tmp/llm_tests/llama_pretrain_results \
	# --data-path=/home/TestData/nlp/megatron_llama/data/rp2_sample_sentencepiece_preproc_text_document \
	# --tokenizer-path=/home/TestData/nlp/megatron_llama/tokenizer.model \
	# --index-mapping-dir=/tmp/llm_tests/llama_index_mappings \

	# python tests/collections/llm/llama3_pretraining.py \
	# --seq-length 1024 \
	# --devices=2 \
	# --max-steps=6 \
	# --experiment-dir=/tmp/llm_tests/llama_pretrain_results \
	# --data-path=/home/TestData/nlp/megatron_llama/data/rp2_sample_sentencepiece_preproc_text_document \
	# --tokenizer-path=/home/TestData/nlp/megatron_llama/tokenizer.model \
	# --index-mapping-dir=/tmp/llm_tests/llama_index_mappings \
	# --cp 1 --tp 2 --sp 1

	# L2_NeMo_2_llama3_fault_tolerance_plugin:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_llama3_fault_tolerance_plugin') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# mkdir -p /tmp/llm_tests/llama_pretrain_results \
	# export FAULT_TOL_CFG_PATH="/tmp/llm_tests/llama_pretrain_results/sample_job_ft_cfg.yml"; \
	# export FAULT_TOL_FINISHED_FLAG_FILE="/tmp/llm_tests/llama_pretrain_results/sample_job_finished_flag"; \
	# python tests/collections/llm/test_fault_nvrx.py \
	# --devices=2 \
	# --crash-step=16 \
	# --experiment-dir=/tmp/llm_tests/llama_pretrain_results \
	# --data-path=/home/TestData/nlp/megatron_llama/data/rp2_sample_sentencepiece_preproc_text_document \
	# --tokenizer-path=/home/TestData/nlp/megatron_llama/tokenizer.model \
	# --index-mapping-dir=/tmp/llm_tests/llama_index_mappings \
	# 2>&1 \| tee /tmp/llm_tests/llama_pretrain_results/run.log \

	# L2_NeMo_2_llama3_straggler_detection:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_llama3_straggler_detection') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# mkdir -p /tmp/llm_tests/llama_pretrain_results \
	# export FAULT_TOL_CFG_PATH="/tmp/llm_tests/llama_pretrain_results/sample_job_ft_cfg.yml"; \
	# export FAULT_TOL_FINISHED_FLAG_FILE="/tmp/llm_tests/llama_pretrain_results/sample_job_finished_flag"; \
	# python tests/collections/llm/test_fault_nvrx.py \
	# --devices=2 \
	# --check-report=True \
	# --experiment-dir=/tmp/llm_tests/llama_pretrain_results \
	# --data-path=/home/TestData/nlp/megatron_llama/data/rp2_sample_sentencepiece_preproc_text_document \
	# --tokenizer-path=/home/TestData/nlp/megatron_llama/tokenizer.model \
	# --index-mapping-dir=/tmp/llm_tests/llama_index_mappings \
	# 2>&1 \| tee /tmp/llm_tests/llama_pretrain_results/run.log \

	# L2_NeMo_2_GPT_DDP_Param_Parity_check:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_DDP_Param_Parity_check') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# TORCHDYNAMO_DISABLE=1 python tests/lightning/test_ddp_parity_checker.py \
	# --vocab-path=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
	# --merges-path=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
	# --data-path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document

	# AFTER_SCRIPT: \|
	# rm -rf tests/collections/llm/gpt_pretrain_results
	# rm -rf tests/collections/llm/gpt_index_mappings

	# L2_NeMo_2_SSM_Pretraining:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_SSM_Pretraining') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|

	# python tests/collections/llm/gpt/model/megatron_ssm_pretraining.py \
	# --devices 1 \
	# --max-steps 10 \
	# --experiment-dir /tmp/nlp_megatron_mamba_nemo-ux-mamba_cicd_test_pretrain/${{ github.run_id }} \
	# --data-path /home/TestData/nlp/megatron_mamba/toy_ssm_dataset/legal_pile_text_document

	# L2_NeMo_2_SSM_Finetuning:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_SSM_Finetuning') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|

	# python tests/collections/llm/gpt/model/megatron_ssm_finetuning.py \
	# --devices 1 \
	# --max-steps 10 \
	# --experiment-dir /tmp/nlp_megatron_mamba_nemo-ux-mamba_cicd_test_sft/${{ github.run_id }} \
	# --model-path /home/TestData/nlp/megatron_mamba/model_optim_rng.pt

	# L2_NeMo_2_HF_MODEL_IMPORT:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_HF_MODEL_IMPORT') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt/model/test_model_import.py

	# AFTER_SCRIPT: \|
	# rm -rf ~/.cache/nemo/models

	# L2_NeMo_2_jit_callback:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_jit_callback') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/test_nemo_jit_cb.py

	# L2_NeMo_2_T5_Pretraining:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_T5_Pretraining') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/collections/llm/megatron_t5_pretraining.py \
	# --devices=2 \
	# --max-steps=3 \
	# --experiment-dir=tests/collections/llm/t5_pretrain_results/${{ github.run_id }} \
	# --data-path=/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document \
	# --index-mapping-dir=tests/collections/llm/t5_index_mappings/${{ github.run_id }}

	# python tests/collections/llm/megatron_t5_pretraining.py \
	# --devices=2 \
	# --max-steps=6 \
	# --experiment-dir=tests/collections/llm/t5_pretrain_results/${{ github.run_id }} \
	# --data-path=/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document \
	# --index-mapping-dir=tests/collections/llm/t5_index_mappings/${{ github.run_id }}
	# AFTER_SCRIPT: \|
	# rm -rf tests/collections/llm/t5_pretrain_results/${{ github.run_id }}
	# rm -rf tests/collections/llm/t5_index_mappings/${{ github.run_id }}

	# L2_NeMo_2_T5_Finetuning:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_T5_Finetuning') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/collections/llm/megatron_t5_finetuning.py \
	# --devices=2 \
	# --max-steps=250 \
	# --experiment-dir=tests/collections/llm/t5_finetune_results/${{ github.run_id }} \
	# --checkpoint-path=/home/TestData/nlp/megatron_t5/220m/nemo2.0_t5_220m_padding_attnmasktype_150steps
	# AFTER_SCRIPT: \|
	# rm -rf tests/collections/llm/t5_finetune_results/${{ github.run_id }}

	# L2_NeMo_2_T5_LoRA:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_T5_LoRA') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/collections/llm/megatron_t5_finetuning.py \
	# --devices=2 \
	# --max-steps=250 \
	# --peft=lora \
	# --experiment-dir=tests/collections/llm/t5_peft_results/${{ github.run_id }} \
	# --checkpoint-path=/home/TestData/nlp/megatron_t5/220m/nemo2.0_t5_220m_padding_attnmasktype_150steps
	# AFTER_SCRIPT: \|
	# rm -rf tests/collections/llm/t5_peft_results/${{ github.run_id }}

	# L2_NeMo_2_NEVA_MOCK_TRAINING:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_TRAINING') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/collections/vlm/test_neva_train.py \
	# --devices=1 \
	# --max-steps=5 \
	# --experiment-dir=/tmp/nemo2_neva_results/${{ github.run_id }}

	# L2_NeMo_2_NEVA_MOCK_PACKED_TRAINING:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_PACKED_TRAINING') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/collections/vlm/test_neva_train.py \
	# --devices=1 \
	# --max-steps=5 \
	# --experiment-dir=/tmp/nemo2_neva_results/${{ github.run_id }} \
	# --use_packed_sequence

	# L2_NeMo_2_MLLAMA_MOCK_TRAINING:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_MLLAMA_MOCK_TRAINING') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# TRANSFORMERS_OFFLINE=1 \
	# python tests/collections/vlm/test_mllama_train.py \
	# --devices=1 \
	# --max-steps=5 \
	# --experiment-dir=/tmp/nemo2_mllama_results/${{ github.run_id }}

	# L2_NeMo_2_Mixtral_Pretraining:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mixtral_Pretraining') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python3 tests/collections/llm/megatron_mixtral_pretraining.py \
	# --experiment-dir=/tmp/mixtral_pretrain_results \
	# --data-path=/home/TestData/nlp/megatron_t5/data/pile_val_small_bert_tokenizer_text_document

	# L2_NeMo_2_GPT_SFT_TP1PP1_MBS1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1


	# L2_NeMo_2_GPT_SFT_TP1PP1_MBS2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 2

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 2


	# L2_NeMo_2_GPT_SFT_TP1PP2_MBS2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP2_MBS2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 1 \
	# --pp_size 2 \
	# --mbs 2

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 1 \
	# --pp_size 2 \
	# --mbs 2


	# L2_NeMo_2_GPT_SFT_TP2PP1_MBS2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP2PP1_MBS2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 2 \
	# --pp_size 1 \
	# --mbs 2

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 2 \
	# --pp_size 1 \
	# --mbs 2


	# L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 --packed

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft none \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 --packed


	# L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1


	# L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 2

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 2


	# L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 2 \
	# --mbs 2

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 2 \
	# --mbs 2


	# L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 2 \
	# --pp_size 1 \
	# --mbs 2

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 2 \
	# --pp_size 1 \
	# --mbs 2

	# L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 --packed

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 --packed

	# L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft dora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 --packed

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft dora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 --packed

	# L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft canonical_lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 --packed

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft canonical_lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 --packed

	# L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 3 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 \
	# --chat_dataset_path /home/TestData/nemo2_data/chat

	# python tests/collections/llm/gpt_finetuning.py \
	# --restore_path /home/TestData/nemo2_ckpt/llama_68M_v2 \
	# --devices 2 \
	# --max_steps 6 \
	# --experiment_dir /tmp/nemo2_gpt_finetune/${{ github.run_id }} \
	# --peft lora \
	# --tp_size 1 \
	# --pp_size 1 \
	# --mbs 1 \
	# --chat_dataset_path /home/TestData/nemo2_data/chat

	# L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/lora_mistralai.py \
	# --max-steps 3 \
	# --ep 1 \
	# --mbs 2 \
	# --model mixtral

	# L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/lora_mistralai.py \
	# --max-steps 3 \
	# --tp 1 \
	# --mbs 1 \
	# --model mixtral \
	# --dist-opt

	# OPTIONAL_L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/lora_mistralai.py \
	# --max-steps 3 \
	# --tp 2 \
	# --mbs 1 \
	# --model mixtral \
	# --dist-opt
	# IS_OPTIONAL: true

	# L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/lora_mistralai.py \
	# --max-steps 3 \
	# --tp 1 \
	# --mbs 1 \
	# --model mistral \
	# --dist-opt

	# L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/lora_mistralai.py \
	# --max-steps 3 \
	# --tp 2 \
	# --mbs 1 \
	# --model mistral \
	# --dist-opt

	# L2_NEMO_2_LoRA_MERGE:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NEMO_2_LoRA_MERGE') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|

	# python tests/collections/llm/peft/lora_merge.py \
	# --lora_checkpoint_path=/home/TestData/nemo2_ckpt/llama_lora_ci_checkpoint_v2/ \
	# --output_path=/tmp/nemo2_lora_merge/${{ github.run_id }}

	# L2_NEMO_2_LoRA_Export:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NEMO_2_LoRA_Export') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|

	# python tests/collections/llm/peft/lora_export.py \
	# --lora_checkpoint_path=/home/TestData/nemo2_ckpt/llama_lora_ci_checkpoint_v2/ \
	# --output_path=/tmp/nemo2_lora_merge/${{ github.run_id }}

	# L2_NEMO_2_LoRA_Inference:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NEMO_2_LoRA_Inference') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|

	# python scripts/llm/generate.py \
	# --model_path /home/TestData/nemo2_ckpt/llama_lora_ci_checkpoint_v2/ \
	# --tp 1 \
	# --pp 1 \
	# --devices 1 \
	# --top_p 0.0 \
	# --top_k 1 \
	# --num_tokens_to_generate 3

	# L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# bash tests/collections/llm/bitexact/mixtral/run.sh

	# L2_NeMo_2_PTQ_Llama2_FP8:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_PTQ_Llama2_FP8') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/collections/llm/test_hf_import.py --hf_model /home/TestData/nlp/megatron_llama/llama-ci-hf --output_path /tmp/nemo2_ckpt

	# python scripts/llm/ptq.py -nc /tmp/nemo2_ckpt -algo fp8 -out /tmp/nemo2_ptq_engine

	# AFTER_SCRIPT: \|
	# rm -rf /tmp/nemo2_ckpt
	# rm -rf /tmp/nemo2_ptq_engine

	# L2_NeMo_2_Export_In_Framework:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_Export_In_Framework') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/collections/llm/test_hf_import.py \
	# --hf_model /home/TestData/nlp/megatron_llama/llama-ci-hf \
	# --output_path /tmp/nemo2_ckpt

	# python tests/setup/data/create_sample_lambada.py \
	# --output_file /tmp/lambada.json

	# python tests/export/nemo_export.py \
	# --model_name test \
	# --model_type llama \
	# --checkpoint_dir /tmp/nemo2_ckpt \
	# --min_tps 1 \
	# --in_framework True \
	# --test_deployment True \
	# --run_accuracy True \
	# --test_data_path /tmp/lambada.json \
	# --accuracy_threshold 0.0 \
	# --debug

	# AFTER_SCRIPT: \|
	# rm -rf /tmp/nemo2_ckpt /tmp/lambada.json

	# L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure-gpus-1
	# SCRIPT: \|
	# python tests/collections/vlm/test_llava_next_train.py \
	# --devices=1 \
	# --max-steps=5 \
	# --experiment-dir=/tmp/nemo2_llava_next_results/${{ github.run_id }}

	# AFTER_SCRIPT: \|
	# rm -rf /tmp/nemo2_llava_next_results

	# L2_NeMo_2_VLLM_EXPORT:
	# needs: [cicd-test-container-setup]
	# uses: ./.github/workflows/_test_template.yml
	# if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'L2_NeMo_2_VLLM_EXPORT') \|\| needs.cicd-test-container-setup.outputs.all == 'true'
	# with:
	# RUNNER: self-hosted-azure
	# SCRIPT: \|
	# python tests/setup/models/create_hf_model.py \
	# --model_name_or_path /home/TestData/nlp/megatron_llama/llama-ci-hf \
	# --output_dir /tmp/llama_head64 \
	# --config_updates "{\"hidden_size\": 512, \"num_attention_heads\": 4, \"numx_hidden_layers\": 2, \"num_key_value_heads\": 4, \"intermediate_size\": 1024, \"head_dim\": 128, \"num_hidden_layers\": 2, \"torch_dtype\": \"float16\" }"

	# python tests/collections/llm/test_hf_import.py --hf_model /tmp/llama_head64 --output_path /tmp/nemo2_ckpt

	# /opt/venv/bin/python tests/export/nemo_export.py \
	# --min_tps 1 \
	# --max_tps 1 \
	# --use_vllm True \
	# --model_type llama \
	# --max_output_len 128 \
	# --test_deployment True \
	# --model_name nemo2_ckpt \
	# --model_dir /tmp/vllm_from_nemo2 \
	# --checkpoint_dir /tmp/nemo2_ckpt

	# AFTER_SCRIPT: \|
	# rm -rf /tmp/llama_head64
	# rm -rf /tmp/nemo2_ckpt
	# rm -rf /tmp/vllm_from_nemo2

	# Nemo_CICD_Test:
	# needs:
	# - pre-flight
	# - cicd-test-container-setup

	# - L0_Unit_Tests_GPU_ASR
	# - L0_Unit_Tests_GPU_Audio
	# - L0_Unit_Tests_GPU_Common
	# - L0_Unit_Tests_GPU_LLM
	# - L0_Unit_Tests_GPU_Multimodal
	# - L0_Unit_Tests_GPU_NLP
	# - L0_Unit_Tests_GPU_TTS
	# #- OPTIONAL_L0_Unit_Tests_GPU_Core
	# - L0_Unit_Tests_GPU_Hydra
	# - L0_Unit_Tests_GPU_Lightning
	# - L0_Unit_Tests_GPU_Others

	# - L0_Unit_Tests_CPU_ASR
	# - L0_Unit_Tests_CPU_Audio
	# - L0_Unit_Tests_CPU_Common
	# - L0_Unit_Tests_CPU_LLM
	# - L0_Unit_Tests_CPU_Multimodal
	# - L0_Unit_Tests_CPU_NLP
	# - L0_Unit_Tests_CPU_TTS
	# - L0_Unit_Tests_CPU_Core
	# - L0_Unit_Tests_CPU_Hydra
	# - L0_Unit_Tests_CPU_Lightning
	# - L0_Unit_Tests_CPU_Others

	# - L2_Community_LLM_Checkpoints_tests_Bert
	# - L2_Community_LLM_Checkpoints_tests_Mamba2
	# - L2_Community_LLM_Checkpoints_tests_Llama
	# - L2_Community_LLM_Checkpoints_tests_StarCoder
	# - L2_Community_LLM_Checkpoints_tests_Falcon
	# - L2_Community_vita_Checkpoints_tests_Llama3
	# #- OPTIONAL_L2_Community_LLM_Checkpoints_tests_Baichuan2
	# - ASR_dev_run_Speech_to_Text
	# - ASR_dev_run_Speech_to_Text_WPE_-_CitriNet
	# - ASR_dev_run_Speech_Pre-training_-_CitriNet
	# - ASR_dev_run_Speech_To_Text_Finetuning
	# - ASR_dev_run_Speech_To_Text_HF_Finetuning
	# - ASR_dev_run_Speech_to_Text_WPE_-_Conformer
	# - ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer
	# - L2_Speech_to_Text_EMA
	# - L2_Speaker_dev_run_Speaker_Recognition
	# - L2_Speaker_dev_run_Speaker_Diarization
	# - L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer
	# - L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference
	# - L2_Speaker_dev_run_Speech_to_Label
	# - L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference
	# - L2_Speaker_dev_run_Clustering_Diarizer_Inference
	# - L2_Speaker_dev_run_Neural_Diarizer_Inference
	# - L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation
	# - L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader
	# - L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader
	# - L2_ASR_Adapters_Linear_Adapters
	# - L2_ASR_Adapters_RelPos_MHA_Adapters
	# - L2_Speech_Transcription_Speech_to_Text_Transcribe
	# - L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav
	# - L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3
	# - L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference
	# - L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference
	# - L2_Pretraining_BERT_pretraining_from_Text
	# - L2_Pretraining_BERT_from_Preprocessed
	# - L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN
	# - L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN
	# - L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation
	# - L2_NMT_Attention_is_All_You_Need_Inference
	# - L2_NMT_Attention_is_All_You_Need_Finetuning
	# - L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation
	# - L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation
	# - L2_Megatron_NMT_Training_TP2
	# - L2_Megatron_Bert_Pretraining_and_Resume_Training_with_Pipeline_Parallelism
	# - L2_Megatron_Core_Bert_Pretraining_and_Resume_Training
	# - L2_RAG_Pipeline_Indexing
	# - L2_RAG_Pipeline_Generating
	# - L2_Megatron_GPT_Pretraining_and_Resume_Training_TP2
	# - L2_Megatron_GPT_Skip_Train
	# - L2_Megatron_LM_To_NeMo_Conversion
	# - L2_Megatron_GPT_with_Rope_Pretraining_and_Resume_Training_TP2
	# - L2_Megatron_GPT_with_ResetLR_Pretraining_and_Resume_Training_TP2
	# - L2_Megatron_GPT_with_Drop_Optimizer_States_TP2
	# - L2_Megatron_GPT_with_ALiBi_Pretraining_and_Resume_Training_TP2
	# - L2_Megatron_GPT_with_KERPLE_Pretraining_and_Resume_Training_TP2
	# #- OPTIONAL_L2_Megatron_GPT_Pretraining_and_Resume_Training_PP2
	# - L2_Megatron_GPT_Auto_Configurator_TP1_PP1_MBS124
	# - L2_Megatron_GPT_Finetuning_PP2
	# - L2_Megatron_GPT_Finetuning_StarCoder_PP1
	# - L2_Megatron_GPT_Embedding
	# - L2_Megatron_GPT_PEFT_Lora_PP2_O2
	# - L2_Megatron_GPT_PEFT_Lora_TP2_O1
	# - L2_Megatron_GPT_PEFT_Lora_TP2SP1
	# - L2_Megatron_GPT_Eval
	# - L2_Megatron_GPT_Eval_PP2
	# - L2_Megatron_GPT_SFT_Eval_inference_seq_len_greaterThan_training_seq_len
	# - L2_Megatron_Change_Partitions_Reduce_TP_Num_Partitions_-2_to_1-_and_PP_Num_Partitions_-1_to_2
	# - L2_Megatron_Change_Partitions_Increase_TP_Num_Partitions_-2_to_4-_and_PP_Num_Partitions_-1_to_2
	# - L2_Megatron_Core_T5_Pretraining_and_Resume_Training_TP2
	# - L2_Megatron_T5_with_ALiBi_Pretraining_and_Resume_Training_TP2
	# - L2_Megatron_T5_with_KERPLE_Pretraining_and_Resume_Training_TP2
	# #- OPTIONAL_L2_Megatron_T5_Pretraining_and_Resume_Training_PP2
	# - L2_Megatron_T5_w_Mixture_of_Expert_Pretraining
	# - L2_Megatron_UL2_Pretraining_and_Resume_Training_TP2
	# - L2_Megatron_Core_T5_Eval
	# - L2_Megatron_Core_T5_PEFT_Lora_TP2
	# - L2_Megatron_Mock_Data_Generation_MockGPTDataset
	# - L2_Megatron_Mock_Data_Generation_MockT5Dataset
	# - L2_TTS_Fast_dev_runs_1_Tacotron_2
	# - L2_TTS_Fast_dev_runs_1_WaveGlow
	# - L2_TTS_Fast_dev_runs_1_FastPitch
	# #- OPTIONAL_L2_TTS_Fast_dev_runs_1_RADTTS
	# - L2_TTS_Fast_dev_runs_1_Hifigan
	# - Speech_Checkpoints_tests
	# - L2_Stable_Diffusion_Training
	# - L2_NeMo_2_NEVA_MOCK_TRAINING
	# - L2_NeMo_2_NEVA_MOCK_PACKED_TRAINING
	# - L2_NeMo_2_MLLAMA_MOCK_TRAINING
	# - L2_NeMo_2_GPT_Pretraining_no_transformer_engine
	# - L2_NeMo_2_GPT_DDP_Param_Parity_check
	# - L2_NeMo_2_HF_MODEL_IMPORT
	# - L2_NeMo_2_llama3_pretraining_recipe
	# - L2_NeMo_2_llama3_fault_tolerance_plugin
	# - L2_NeMo_2_llama3_straggler_detection
	# - L2_HF_Transformer_PEFT
	# - L2_HF_Transformer_PEFT_nemorun
	# - L2_HF_Transformer_PEFT_2gpu
	# - L2_HF_Transformer_PEFT_2gpu_nemorun
	# - L2_HF_Transformer_SFT
	# - L2_HF_Transformer_SFT_nemorun
	# - L2_HF_Transformer_SFT_2gpu
	# - L2_VLM_HF_Transformer_PEFT
	# - L2_VLM_HF_Transformer_PEFT_FSDP
	# - L2_VLM_HF_Transformer_PEFT_4bit
	# - L2_VLM_HF_Transformer_SFT_FSDP2
	# - L2_HF_Transformer_SFT_2gpu_nemorun
	# - L2_HF_Transformer_SFT_TE_Acceleration
	# - L2_HF_Transformer_PT
	# - L2_HF_Transformer_PT_nemorun
	# - L2_HF_Transformer_PT_2gpu
	# - L2_HF_Transformer_PT_2gpu_nemorun
	# - L2_HF_Transformer_PT_TE_Acceleration
	# - L2_VLM_HF_Transformer_PEFT
	# - L2_NeMo_2_SSM_Pretraining
	# - L2_NeMo_2_SSM_Finetuning
	# - L2_NeMo_2_T5_Pretraining
	# - L2_NeMo_2_T5_Finetuning
	# - L2_NeMo_2_T5_LoRA
	# - L2_NeMo_2_GPT_SFT_TP1PP1_MBS1
	# - L2_NeMo_2_GPT_SFT_TP1PP1_MBS2
	# - L2_NeMo_2_GPT_SFT_TP1PP2_MBS2
	# - L2_NeMo_2_GPT_SFT_TP2PP1_MBS2
	# - L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED
	# - L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1
	# - L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2
	# - L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2
	# - L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2
	# - L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat
	# - L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED
	# - L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED
	# - L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED
	# - L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2
	# - L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1
	# #- OPTIONAL_L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1
	# - L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1
	# - L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1
	# - L2_NEMO_2_LoRA_MERGE
	# - L2_NEMO_2_LoRA_Export
	# - L2_NEMO_2_LoRA_Inference
	# - L2_NeMo_2_Mixtral_Pretraining
	# - L2_PTQ_Llama2_FP8
	# - L2_Community_LLM_Checkpoints_tests_Llama3
	# - L2_Distill_Llama2
	# - L2_Prune_Width_Llama2
	# - L2_Prune_Depth_Llama2
	# - L2_Speech_to_Text_AED
	# - L2_Speech_Estimate_Duration_Bins
	# - L2_Speech_Batch_Size_OOMptimizer
	# - L2_Speech_Batch_Size_OOMptimizer_Canary
	# - L2_Speech_Transcription_Canary_Transcribe_Full_Manifest
	# - L2_Speech_Transcription_Canary_Transcribe_With_Prompt
	# - L2_Speech_Transcription_Canary_Transcribe_Audio_Dir
	# - L2_Megatron_GPT_Reranker
	# - L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact
	# - L2_NeMo_2_PTQ_Llama2_FP8
	# - L2_NeMo_2_Export_In_Framework
	# - L2_NeMo_2_jit_callback
	# - L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING
	# - L2_HF_Transformer_SFT_FSDP2_2gpu
	# - L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2
	# - L2_NeMo_2_VLLM_EXPORT
	# if: always()
	# runs-on: ubuntu-latest
	# steps:
	# - name: Evaluate conclusion
	# if: ${{ always() }}
	# id: pipeline-conclusion
	# run: \|
	# # Slack notifications are send only on test failure (not cancelled):
	# FAILED=${{ contains(needs.*.outputs.conclusion, 'failure') }}
	# echo "FAILED=$FAILED" >> $GITHUB_OUTPUT

	# # Mark as successful if no job was cancelled:
	# SUCCESS=${{ !contains(needs..outputs.conclusion, 'failure') && !contains(needs..result, 'cancelled') && !contains(needs.*.result, 'skipped') }}
	# echo "SUCCESS=$SUCCESS" >> $GITHUB_OUTPUT

	# # This should depend on all the tests so we block/unblock based on all tests passing
	# - name: Pipeline successful, set exit code to 0
	# if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'true' }}
	# run: exit 0

	# - name: Pipeline successful, add PR comment
	# if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'true' && github.event_name == 'pull_request' && env.SLACK_WEBHOOK != '' }}
	# uses: peter-evans/create-or-update-comment@v4
	# env:
	# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
	# REPOSITORY: ${{ github.repository }}
	# RUN_ID: ${{ github.run_id }}
	# with:
	# issue-number: ${{ github.event.number }}
	# body: \|
	# [🤖]: Hi @${{ github.event.pull_request.user.login }} 👋,

	# We wanted to let you know that a [CICD pipeline](https://github.com/${{ env.REPOSITORY }}/actions/runs/${{ env.RUN_ID }}) for this PR just finished successfully

	# So it might be time to merge this PR or get some approvals

	# I'm just a bot so I'll leave it you what to do next.

	# //cc @pablo-garay @ko3n1g

	# - name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary"
	# if: ${{ always() && steps.pipeline-conclusion.outputs.FAILED == 'true' && env.SLACK_WEBHOOK != '' }}
	# env:
	# SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
	# SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
	# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	# GITHUB_ACTOR: ${{ github.actor }}
	# BRANCH: ${{ github.head_ref \|\| github.ref_name }}
	# REPOSITORY: ${{ github.repository }}
	# RUN_ID: ${{ github.run_id }}
	# PR_NUMBER: ${{ github.event.number }}
	# SERVER_URL: ${{ github.server_url }}
	# run: \|
	# set -x

	# PR_INFO=$(curl -L \
	# -H "Accept: application/vnd.github+json" \
	# -H "Authorization: Bearer $GITHUB_TOKEN" \
	# -H "X-GitHub-Api-Version: 2022-11-28" \
	# https://api.github.com/repos/$REPOSITORY/pulls/$PR_NUMBER
	# )
	# PR_URL=$(echo -E $PR_INFO \| jq '.html_url' \| tr -d '"')
	# PR_TITLE=$(echo -E $PR_INFO \| jq '.title' \| tr -d '"')

	# PIPELINE_URL=$SERVER_URL/$REPOSITORY/actions/runs/$RUN_ID
	# BASE_MESSAGE='
	# {
	# "blocks": [
	# {
	# "type": "section",
	# "text": {
	# "type": "mrkdwn",
	# "text": "🚨 CI/CD failure at <'$PIPELINE_URL'\|NeMo CI>."
	# }
	# }
	# ]
	# }
	# '

	# # Since this workflow contains more than 100 jobs, we need to iterate over job pages
	# JOBS='[]'
	# PAGE=1
	# while : ; do
	# JOBS_URL="https://api.github.com/repos/$REPOSITORY/actions/runs/$RUN_ID/jobs?page=$PAGE&per_page=100"
	# RESPONSE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" $JOBS_URL \| jq '.jobs')
	# JOBS=$(echo -e "$JOBS\n$RESPONSE" \| jq -cs 'add')
	# if [[ $(echo $RESPONSE \| jq 'length') -lt 100 ]]; then
	# break
	# else
	# PAGE=$(( PAGE + 1))
	# fi
	# done

	# SUMMARY="[]"
	# echo "Failed jobs: " \| tee -a $GITHUB_STEP_SUMMARY
	# while IFS= read -r JOB; do
	# JOB_NAME="$(echo $JOB \| jq '.key' \| tr -d '"') / main"
	# JOB_ID=$(echo $JOBS \| jq --arg job_name "$JOB_NAME" '.[] \| select(.name == $job_name) \| .id')
	# JOB_URL="https://github.com/$REPOSITORY/actions/runs/$RUN_ID/job/$JOB_ID"

	# echo "* [$JOB_NAME]($JOB_URL)" \| tee -a $GITHUB_STEP_SUMMARY

	# LOGS=$(echo $JOB \| yq '(.value.outputs.log \| @base64d)' \| tr -d '"')
	# LOGS=$([[ $(echo $LOGS \| wc -c) -gt 0 ]] && echo -E "\`\`\`\n$LOGS\n\`\`\`" \|\| echo "")
	# LOGS=$([[ $(echo $JOB \| yq '.value.outputs.potential_infra_failure') == "true" ]] && echo -E "$LOGS\n\ncc: $SLACK_WEBHOOK_ADMIN" \|\| echo -E "$LOGS")

	# SUMMARY=$(echo "$SUMMARY" \| jq \
	# --arg pr "<$PR_URL\|$PR_TITLE>" \
	# --arg job "<$JOB_URL\|$JOB_NAME>" \
	# --arg logs "$(echo -e "$LOGS")" \
	# --arg author "<https://github.com/$GITHUB_ACTOR\|$GITHUB_ACTOR>" \
	# --arg branch "<https://github.com/$REPOSITORY/tree/$BRANCH\|$BRANCH>"\
	# '. += [
	# {
	# "type": "section",
	# "text": {
	# "type": "mrkdwn",
	# "text": (
	# "PR: " + $pr
	# + "\nJob: " + $job
	# + "\nAuthor: " + $author
	# + "\nBranch: " + $branch
	# + "\nLogs:" + $logs
	# )
	# }
	# }
	# ]')
	# done <<<$(echo '${{ toJSON(needs) }}' \| jq -c 'to_entries \| .[] \| select(.value.outputs.conclusion == "failure")')

	# MESSAGE=$(echo $BASE_MESSAGE \| jq -c --argjson summary "$SUMMARY" '.blocks += $summary')

	# curl -X POST -H "Content-type: application/json" --data "$MESSAGE" $SLACK_WEBHOOK

	# - name: "Pipeline not successful, set exit code to 1"
	# if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'false' }}
	# run: exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ci: Add coverage reports #11

Workflow file

ci: Add coverage reports #11

Jobs

Run details

Workflow file for this run