Skip to content

Testing deploy_inframework and query_inframework scripts #11175

Testing deploy_inframework and query_inframework scripts

Testing deploy_inframework and query_inframework scripts #11175

Workflow file for this run

# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: CICD NeMo
on:
schedule:
- cron: 0 0 * * *
pull_request:
branches:
- main
- r**
- weekly-bump
types: [labeled]
push:
branches:
- main
workflow_dispatch:
inputs:
test_to_run:
required: false
default: all
type: string
description: Comma-separated list of tests to run. Use "all" to run the full test suite.
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
cancel-in-progress: true
jobs:
pre-flight:
runs-on: ubuntu-latest
outputs:
test_to_run: ${{ steps.test_to_run.outputs.main }}
build_args: ${{ steps.manifest.outputs.BUILD_ARGS }}
env:
TESTS_TO_RUN: ${{ inputs.test_to_run }}
EVENT_NAME: ${{ github.event_name }}
HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }}
steps:
- name: Checkout branch
uses: actions/checkout@v4
- name: Select tests to run
id: test_to_run
run: |
# For manual dispatch, we replace `all` with the actual job names
if [[ "$EVENT_NAME" == "workflow_dispatch" && "$TESTS_TO_RUN" == "all" ]]; then
TESTS_TO_RUN=$(cat .github/workflows/cicd-main.yml | yq '.jobs | [to_entries[] | .key] | join(",")')
# For manual dispatch with provided list of tests, do nothing
elif [[ "$EVENT_NAME" == "workflow_dispatch" && "$TESTS_TO_RUN" != "all" ]]; then
TESTS_TO_RUN=$TESTS_TO_RUN
# For correctly labeled PR, we replace `all` with the actual job names
elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" == "true" ]]; then
TESTS_TO_RUN=$(cat .github/workflows/cicd-main.yml | yq '.jobs | [to_entries[] | .key] | join(",")')
# For incorrectly labeled PR, run no tests
elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" != "true" ]]; then
TESTS_TO_RUN=""
# For push events, run all tests. This is so that we can generate coverage
# on branch `main`.
elif [[ "$EVENT_NAME" == "push" ]]; then
TESTS_TO_RUN=$(cat .github/workflows/cicd-main.yml | yq '.jobs | [to_entries[] | .key] | join(",")')
else
echo "Unsupported event_name $EVENT_NAME provided".
exit 1
fi
parsed_string=$(echo "$TESTS_TO_RUN" | jq -c --raw-input 'split(",")')
echo "main=${parsed_string}" | tee -a "$GITHUB_OUTPUT"
- name: Parse manifest.json
id: manifest
run: |
BUILD_ARGS=$(cat << EOF
BASE_IMAGE=$(cat requirements/manifest.json | jq -r '."ngc-pytorch"')
TRTLLM_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."trt-llm".repo')
TRTLLM_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."trt-llm".ref')
MLM_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."megatron-lm".repo')
MLM_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies"."megatron-lm".ref')
TE_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".transformer_engine.repo')
TE_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".transformer_engine.ref')
APEX_REPO=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".apex.repo')
APEX_TAG=$(cat requirements/manifest.json | jq -r '."vcs-dependencies".apex.ref')
EOF
)
echo "BUILD_ARGS<<EOF" >> $GITHUB_OUTPUT
echo "$BUILD_ARGS" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
code-linting:
if: ${{ needs.pre-flight.outputs.test_to_run != '[]' }}
needs: [pre-flight]
uses: ./.github/workflows/code-linting.yml
cicd-test-container-build:
if: ${{ needs.pre-flight.outputs.test_to_run != '[]' }}
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
needs: [pre-flight, code-linting]
with:
image-name: nemo_container
dockerfile: Dockerfile.ci
image-label: nemo-core
build-args: |
IMAGE_LABEL=nemo-core
NEMO_TAG=${{ github.sha }}
NEMO_REPO=https://github.com/NVIDIA/NeMo
${{ needs.pre-flight.outputs.BUILD_ARGS }}
prune-filter-timerange: 24h
cicd-import-tests:
if: ${{ needs.pre-flight.outputs.test_to_run != '[]' }}
needs: [cicd-test-container-build, pre-flight]
runs-on: self-hosted-azure-gpus-1
steps:
- name: Run some checks
run: |
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\
# PyTorch Lightning version
python -c "import lightning.pytorch; print(lightning.pytorch.__version__)"
# PyTorch Lightning DDP Checks
CUDA_VISIBLE_DEVICES="0,1" Gotchapython "tests/core_ptl/check_for_ranks.py"
# Basic Import Checks
python tests/core_ptl/check_imports.py --domain asr
python tests/core_ptl/check_imports.py --domain nlp
python tests/core_ptl/check_imports.py --domain tts '
# L0: GPU unit tests
L0_Unit_Tests_GPU_ASR:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_ASR')
with:
RUNNER: self-hosted-azure-gpus-1
TIMEOUT: 20
IS_UNIT_TEST: true
# TODO: remove this hack
SCRIPT: L0_Unit_Tests_GPU_ASR
L0_Unit_Tests_GPU_Audio:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Audio')
with:
RUNNER: self-hosted-azure-gpus-1
TIMEOUT: 20
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_Audio
L0_Unit_Tests_GPU_Common:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Common')
with:
RUNNER: self-hosted-azure-gpus-1
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_Common
L0_Unit_Tests_GPU_LLM:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_LLM')
with:
RUNNER: self-hosted-azure-gpus-1
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_LLM
L0_Unit_Tests_GPU_VLM:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_VLM')
with:
RUNNER: self-hosted-azure-gpus-1
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_VLM
L0_Unit_Tests_GPU_Multimodal:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Multimodal')
with:
RUNNER: self-hosted-azure-gpus-1
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_Multimodal
L0_Unit_Tests_GPU_TTS:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_TTS')
with:
RUNNER: self-hosted-azure-gpus-1
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_TTS
L0_Unit_Tests_GPU_Core:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Core')
with:
RUNNER: self-hosted-azure-gpus-1
TIMEOUT: 20
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_Core
L0_Unit_Tests_GPU_Hydra:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Hydra')
with:
RUNNER: self-hosted-azure-gpus-1
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_Hydra
L0_Unit_Tests_GPU_Lightning:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Lightning')
with:
RUNNER: self-hosted-azure
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_Lightning
L0_Unit_Tests_GPU_Others:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_GPU_Others')
with:
RUNNER: self-hosted-azure-gpus-1
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_GPU_Others
# L0: CPU unit tests
L0_Unit_Tests_CPU_ASR:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_ASR')
with:
RUNNER: self-hosted-azure-cpu
TIMEOUT: 20
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_ASR
L0_Unit_Tests_CPU_Audio:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Audio')
with:
RUNNER: self-hosted-azure-cpu
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_Audio
L0_Unit_Tests_CPU_Common:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Common')
with:
RUNNER: self-hosted-azure-cpu
TIMEOUT: 20
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_Common
L0_Unit_Tests_CPU_LLM:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_LLM')
with:
RUNNER: self-hosted-azure-cpu
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_LLM
L0_Unit_Tests_CPU_VLM:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_VLM')
with:
RUNNER: self-hosted-azure-cpu
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_VLM
L0_Unit_Tests_CPU_Multimodal:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Multimodal')
with:
RUNNER: self-hosted-azure-cpu
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_Multimodal
L0_Unit_Tests_CPU_TTS:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_TTS')
with:
RUNNER: self-hosted-azure-cpu
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_TTS
L0_Unit_Tests_CPU_Core:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Core')
with:
RUNNER: self-hosted-azure-cpu
TIMEOUT: 20
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_Core
L0_Unit_Tests_CPU_Hydra:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Hydra')
with:
RUNNER: self-hosted-azure-cpu
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_Hydra
L0_Unit_Tests_CPU_Lightning:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Lightning')
with:
RUNNER: self-hosted-azure-cpu
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_Lightning
L0_Unit_Tests_CPU_Others:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Unit_Tests_CPU_Others')
with:
RUNNER: self-hosted-azure-cpu
IS_UNIT_TEST: true
SCRIPT: L0_Unit_Tests_CPU_Others
L0_Setup_Test_Data_And_Models:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L0_Setup_Test_Data_And_Models')
with:
RUNNER: self-hosted-azure
SCRIPT: L0_Setup_Test_Data_And_Models
# L2: Community llava multimodal Checkpoints tests
L2_Community_vita_Checkpoints_tests_Llama3:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Community_vita_Checkpoints_tests_Llama3')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_Community_vita_Checkpoints_tests_Llama3
# L2: ASR dev run
ASR_dev_run_Speech_to_Text:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: ASR_dev_run_Speech_to_Text
ASR_dev_run_Speech_to_Text_WPE_CitriNet:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text_WPE_CitriNet')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: ASR_dev_run_Speech_to_Text_WPE_CitriNet
ASR_dev_run_Speech_Pre-training_-_CitriNet:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_Pre-training_-_CitriNet')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: ASR_dev_run_Speech_Pre-training_-_CitriNet
Optional_ASR_dev_run_Speech_To_Text_Finetuning:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'Optional_ASR_dev_run_Speech_To_Text_Finetuning')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: Optional_ASR_dev_run_Speech_To_Text_Finetuning
IS_OPTIONAL: true
Optional_ASR_dev_run_Speech_To_Text_HF_Finetuning:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'Optional_ASR_dev_run_Speech_To_Text_HF_Finetuning')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: Optional_ASR_dev_run_Speech_To_Text_HF_Finetuning
IS_OPTIONAL: true
ASR_dev_run_Speech_to_Text_WPE_-_Conformer:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run_Speech_to_Text_WPE_-_Conformer')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: ASR_dev_run_Speech_to_Text_WPE_-_Conformer
# L2: ASR dev run - part two
ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer
L2_Speech_to_Text_EMA:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_to_Text_EMA')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speech_to_Text_EMA
L2_Speech_to_Text_AED:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_to_Text_AED')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_Speech_to_Text_AED
# L2: Speaker dev run
L2_Speaker_dev_run_Speaker_Recognition:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Recognition')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_Speaker_dev_run_Speaker_Recognition
L2_Speaker_dev_run_Speaker_Diarization:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Diarization')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_Speaker_dev_run_Speaker_Diarization
L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer
L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference
L2_Speaker_dev_run_Speech_to_Label:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speech_to_Label')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_Speaker_dev_run_Speech_to_Label
L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference
L2_Speaker_dev_run_Clustering_Diarizer_Inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Clustering_Diarizer_Inference')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speaker_dev_run_Clustering_Diarizer_Inference
L2_Speaker_dev_run_Neural_Diarizer_Inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Neural_Diarizer_Inference')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speaker_dev_run_Neural_Diarizer_Inference
L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation
# L2: ASR Multi-dataloader dev run
L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader
L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader
# L2: ASR Adapters
L2_ASR_Adapters_Linear_Adapters:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Adapters_Linear_Adapters')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_ASR_Adapters_Linear_Adapters
L2_ASR_Adapters_RelPos_MHA_Adapters:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_ASR_Adapters_RelPos_MHA_Adapters')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_ASR_Adapters_RelPos_MHA_Adapters
# L2: OOMptimizer
L2_Speech_Estimate_Duration_Bins:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Estimate_Duration_Bins')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speech_Estimate_Duration_Bins
# L2: OOMptimizer
L2_Speech_Batch_Size_OOMptimizer:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Batch_Size_OOMptimizer')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speech_Batch_Size_OOMptimizer
# L2: OOMptimizer Canary (has a different batch schema)
Optional_L2_Speech_Batch_Size_OOMptimizer_Canary:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'Optional_L2_Speech_Batch_Size_OOMptimizer_Canary')
with:
RUNNER: self-hosted-azure
SCRIPT: Optional_L2_Speech_Batch_Size_OOMptimizer_Canary
IS_OPTIONAL: true
# L2: Speech Transcription
L2_Speech_Transcription_Speech_to_Text_Transcribe:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Speech_to_Text_Transcribe')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speech_Transcription_Speech_to_Text_Transcribe
# L2: Speech Transcription
L2_Speech_Transcription_Canary_Transcribe_Full_Manifest:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_Full_Manifest')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speech_Transcription_Canary_Transcribe_Full_Manifest
L2_Speech_Transcription_Canary_Transcribe_With_Prompt:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_With_Prompt')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speech_Transcription_Canary_Transcribe_With_Prompt
AFTER_SCRIPT: |
rm -rf preds.json transcribe.log
L2_Speech_Transcription_Canary_Transcribe_Audio_Dir:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Speech_Transcription_Canary_Transcribe_Audio_Dir')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Speech_Transcription_Canary_Transcribe_Audio_Dir
AFTER_SCRIPT: |
rm -rf preds.json
IS_OPTIONAL: true
# L2: Longform without TimeStamps from Audio Dir
L2_Longform_Speech_Transcription_Canary_Chunked_Infer_from_Audio_Dir:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Longform_Speech_Transcription_Canary_Chunked_Infer_from_Audio_Dir')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Longform_Speech_Transcription_Canary_Chunked_Infer_from_Audio_Dir
AFTER_SCRIPT: |
rm -rf preds.json
# L2: Longform with TimeStamps from Audio Dir
L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Audio_Dir:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Audio_Dir')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Audio_Dir
AFTER_SCRIPT: |
rm -rf preds.json
# L2: Longform with TimeStamps from manifest
L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Manifest:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Manifest')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Manifest
AFTER_SCRIPT: |
rm -rf preds.json
# L2: Segmentation Tool
L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav
L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3
# L2: G2P Models
L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference
# TODO: pleasefixme @redoctopus
# - name: ByT5G2P training, evaluation and inference
# run: |
# cd examples/tts/g2p && \
# TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR_T5=output_byt5_${TIME} && \
# python g2p_train_and_evaluate.py \
# train_manifest=/home/TestData/g2p/g2p.json \
# validation_manifest=/home/TestData/g2p/g2p.json \
# model.test_ds.manifest_filepath=/home/TestData/g2p/g2p.json \
# trainer.max_epochs=1 \
# model.max_source_len=64 \
# trainer.devices=1 \
# do_training=True \
# do_testing=True \
# exp_manager.exp_dir=${OUTPUT_DIR_T5} \
# +exp_manager.use_datetime_version=False\
# +exp_manager.version=test && \
# python g2p_inference.py \
# pretrained_model=${OUTPUT_DIR_T5}/T5G2P/test/checkpoints/T5G2P.nemo \
# manifest_filepath=/home/TestData/g2p/g2p.json \
# phoneme_field=text
# }
# }
# - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
# if: "failure()"
L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference
# TODO: remove +model.optim.capturable=True when Pytorch fix: https://github.com/pytorch/pytorch/pull/81858
# is in the release container
# L2: NMT Attention is All You Need Training
L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN
AFTER_SCRIPT: |
rm -rf examples/nlp/machine_translation/nmt_results
L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN
L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation
# L2: NMT Attention is All You Need Inference
L2_NMT_Attention_is_All_You_Need_Inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Inference')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NMT_Attention_is_All_You_Need_Inference
# L2: NMT Attention is All You Need Finetuning
L2_NMT_Attention_is_All_You_Need_Finetuning:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Attention_is_All_You_Need_Finetuning')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NMT_Attention_is_All_You_Need_Finetuning
AFTER_SCRIPT: |
rm -rf examples/nlp/machine_translation/nmt_finetune
# L2: NMT Tarred Dataset Creation
L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation
L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation
L2_Megatron_NMT_Training_TP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Megatron_NMT_Training_TP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_Megatron_NMT_Training_TP2
AFTER_SCRIPT: |
rm -rf examples/nlp/machine_translation/megatron_nmt_results
L2_VLM_HF_Transformer_PEFT:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_VLM_HF_Transformer_PEFT
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_VLM_HF_Transformer_PEFT_FSDP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT_FSDP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_VLM_HF_Transformer_PEFT_FSDP2
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_VLM_HF_Transformer_PEFT_4bit:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_PEFT_4bit')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_VLM_HF_Transformer_PEFT_4bit
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_VLM_HF_Transformer_SFT_FSDP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_VLM_HF_Transformer_SFT_FSDP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_VLM_HF_Transformer_SFT_FSDP2
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PEFT_notebook:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_notebook')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_PEFT_notebook
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PEFT:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_HF_Transformer_PEFT
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PEFT_nemorun:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_nemorun')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_HF_Transformer_PEFT_nemorun
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PEFT_2gpu:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_PEFT_2gpu
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PEFT_2gpu_FSDP2_liger:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu_FSDP2_liger') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_PEFT_2gpu_FSDP2_liger
L2_HF_Transformer_PEFT_2gpu_FSDP2_fp8:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu_FSDP2_fp8') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: azure-gpu-vm-runner1-h100
SCRIPT: L2_HF_Transformer_PEFT_2gpu_FSDP2_fp8
IS_OPTIONAL: true
L2_HF_Transformer_PEFT_2gpu_FSDP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu_FSDP2') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_PEFT_2gpu_FSDP2
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PEFT_2gpu_nemorun:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PEFT_2gpu_nemorun')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_PEFT_2gpu_nemorun
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT_2gpu:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_SFT_2gpu
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT_2gpu_FSDP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_FSDP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_SFT_2gpu_FSDP2
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT_2gpu_FSDP2_fp8:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_FSDP2_fp8') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: azure-gpu-vm-runner1-h100
SCRIPT: L2_HF_Transformer_SFT_2gpu_FSDP2_fp8
IS_OPTIONAL: true
L2_HF_Transformer_SFT_2gpu_nemorun:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_nemorun')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_SFT_2gpu_nemorun
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT_FSDP2_2gpu:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_FSDP2_2gpu')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_SFT_FSDP2_2gpu
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PT_2gpu:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_2gpu')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_PT_2gpu
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PT_2gpu_nemorun:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_2gpu_nemorun')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_PT_2gpu_nemorun
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PT:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_HF_Transformer_PT
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_PT_nemorun:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_nemorun')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_HF_Transformer_PT_nemorun
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT_notebook:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_notebook')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_SFT_notebook
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_HF_Transformer_SFT
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT_nemorun:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_nemorun')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_HF_Transformer_SFT_nemorun
AFTER_SCRIPT: |
rm -rf nemo_experiments
L2_HF_Transformer_SFT_TE_Acceleration:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SFT_TE_Acceleration')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_HF_Transformer_SFT_TE_Acceleration
AFTER_SCRIPT: |
rm -rf nemo_experiments
IS_OPTIONAL: true
L2_HF_Transformer_PT_TE_Acceleration:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_PT_TE_Acceleration')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_HF_Transformer_PT_TE_Acceleration
AFTER_SCRIPT: |
rm -rf nemo_experiments
# L2: SpeechLM tests
L2_HF_Transformer_SpeechLM_SFT_2gpu:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_HF_Transformer_SpeechLM_SFT_2gpu') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_HF_Transformer_SpeechLM_SFT_2gpu
AFTER_SCRIPT: |
rm -rf nemo_experiments
# L2: TTS Fast dev runs 1
L2_TTS_Fast_dev_runs_1_Tacotron_2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Tacotron_2')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_TTS_Fast_dev_runs_1_Tacotron_2
L2_TTS_Fast_dev_runs_1_WaveGlow:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_WaveGlow')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_TTS_Fast_dev_runs_1_WaveGlow
L2_TTS_Fast_dev_runs_1_FastPitch:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_FastPitch')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_TTS_Fast_dev_runs_1_FastPitch
# OPTIONAL_L2_TTS_Fast_dev_runs_1_RADTTS:
# needs: [pre-flight, cicd-test-container-build]
# runs-on: self-hosted-azure
# timeout-minutes: 10
# container:
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
# options:
# # --user 0:128
# --device=/dev/nvidia0
# --gpus all
# --shm-size=8g
# --env TRANSFORMERS_OFFLINE=0
# --env HYDRA_FULL_ERROR=1
# --volume /mnt/datadrive/TestData:/home/TestData
# steps:
# - name: Checkout repository
# uses: actions/checkout@v4
# - run: |
# python examples/tts/radtts.py \
# train_dataset=/home/TestData/an4_dataset/an4_train.json \
# validation_datasets=/home/TestData/an4_dataset/an4_val.json \
# sup_data_path=/home/TestData/an4_dataset/radtts_beta_priors \
# trainer.devices="[0]" \
# +trainer.limit_train_batches=1 \
# +trainer.limit_val_batches=1 \
# trainer.max_epochs=1 \
# trainer.strategy=auto \
# model.pitch_mean=212.35873413085938 \
# model.pitch_std=68.52806091308594 \
# model.train_ds.dataloader_params.batch_size=4 \
# model.train_ds.dataloader_params.num_workers=0 \
# model.validation_ds.dataloader_params.batch_size=4 \
# model.validation_ds.dataloader_params.num_workers=0 \
# export_dir=/home/TestData/radtts_test \
# model.optim.lr=0.0001 \
# model.modelConfig.decoder_use_partial_padding=True \
# ~trainer.check_val_every_n_epoch \
# ~model.text_normalizer \
# ~model.text_normalizer_call_kwargs
# #- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
# # if: "failure()"
L2_TTS_Fast_dev_runs_1_Hifigan:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_TTS_Fast_dev_runs_1_Hifigan')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_TTS_Fast_dev_runs_1_Hifigan
# L2: NeRF
# L2_NeRF_DreamFusion:
# needs: [pre-flight, cicd-test-container-build]
# runs-on: self-hosted-azure
# container:
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }}
# options:
# # --user 0:128
# --device=/dev/nvidia0
# --gpus all
# --shm-size=8g
# --env TRANSFORMERS_OFFLINE=0
# --env HYDRA_FULL_ERROR=1
# --volume /mnt/datadrive/TestData:/home/TestData
# steps:
# - name: Checkout repository
# uses: actions/checkout@v4
# - run: |
# python examples/multimodal/text_to_image/nerf/main.py \
# trainer.num_nodes=1 \
# trainer.devices="[0]" \
# trainer.max_steps=1000 \
# model.prompt="a DSLR photo of a delicious hamburger" \
# exp_manager.exp_dir=examples/multimodal/text_to_image/nerf/dreamfusion_results
#
# rm -rf examples/multimodal/text_to_image/nerf/dreamfusion_results
# - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
# if: "failure()"
Speech_Checkpoints_tests:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'Speech_Checkpoints_tests')
with:
RUNNER: self-hosted-azure-gpus-1
TIMEOUT: 20
SCRIPT: Speech_Checkpoints_tests
AFTER_SCRIPT: |
rm -f examples/asr/evaluation_transcripts.json
L2_Stable_Diffusion_Training:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_Stable_Diffusion_Training')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_Stable_Diffusion_Training
AFTER_SCRIPT: |
rm -rf examples/multimodal/text_to_image/sd_train_results
L2_NeMo_2_GPT_Pretraining_no_transformer_engine:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_Pretraining_no_transformer_engine')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_Pretraining_no_transformer_engine
AFTER_SCRIPT: |
rm -rf tests/collections/llm/gpt_pretrain_results
rm -rf tests/collections/llm/gpt_index_mappings
L2_NeMo_2_llama3_pretraining_recipe:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_pretraining_recipe')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_llama3_pretraining_recipe
L2_NeMo_2_llama3_fault_tolerance_plugin:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_fault_tolerance_plugin')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_llama3_fault_tolerance_plugin
L2_NeMo_2_llama3_straggler_detection:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_llama3_straggler_detection')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_llama3_straggler_detection
L2_NeMo_2_GPT_DDP_Param_Parity_check:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_DDP_Param_Parity_check')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_DDP_Param_Parity_check
AFTER_SCRIPT: |
rm -rf tests/collections/llm/gpt_pretrain_results
rm -rf tests/collections/llm/gpt_index_mappings
L2_NeMo_2_Hyena_Conversion_from_HF:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Hyena_Conversion_from_HF')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Hyena_Conversion_from_HF
AFTER_SCRIPT: |
rm -rf tests/collections/llm/hyena_conversion_results/${{ github.run_id }}
L2_NeMo_2_Hyena_DDP_Pretraining_Test:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Hyena_DDP_Pretraining_Test')
with:
RUNNER: self-hosted-azure # Assume runner has 2 GPUs
SCRIPT: L2_NeMo_2_Hyena_DDP_Pretraining_Test
AFTER_SCRIPT: |
rm -rf tests/collections/llm/hyena_pretrain_results/${{ github.run_id }}
L2_NeMo_2_SSM_Pretraining:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_SSM_Pretraining')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NeMo_2_SSM_Pretraining
L2_NeMo_2_SSM_Finetuning:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_SSM_Finetuning')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NeMo_2_SSM_Finetuning
L2_NeMo_2_HF_MODEL_IMPORT:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_HF_MODEL_IMPORT')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_HF_MODEL_IMPORT
AFTER_SCRIPT: |
rm -rf ~/.cache/nemo/models
L2_NeMo_2_jit_callback:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_jit_callback')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_jit_callback
L2_NeMo_2_T5_Pretraining:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_Pretraining')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_T5_Pretraining
AFTER_SCRIPT: |
rm -rf tests/collections/llm/t5_pretrain_results/${{ github.run_id }}
rm -rf tests/collections/llm/t5_index_mappings/${{ github.run_id }}
L2_NeMo_2_T5_Finetuning:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_Finetuning')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_T5_Finetuning
AFTER_SCRIPT: |
rm -rf tests/collections/llm/t5_finetune_results/${{ github.run_id }}
L2_NeMo_2_T5_LoRA:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_T5_LoRA')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_T5_LoRA
AFTER_SCRIPT: |
rm -rf tests/collections/llm/t5_peft_results/${{ github.run_id }}
L2_NeMo_2_BERT_Pretraining_Megatron:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_BERT_Pretraining_Megatron')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_BERT_Pretraining_Megatron
L2_NeMo_2_BERT_Pretraining_HuggingFace:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_BERT_Pretraining_HuggingFace')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_BERT_Pretraining_HuggingFace
L2_NeMo_2_NEVA_MOCK_PRETRAIN_TP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_PRETRAIN_TP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_NEVA_MOCK_PRETRAIN_TP2
L2_NeMo_2_NEVA_MOCK_PRETRAIN_PP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_PRETRAIN_PP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_NEVA_MOCK_PRETRAIN_PP2
L2_NeMo_2_NEVA_MOCK_PRETRAIN_CP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_PRETRAIN_CP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_NEVA_MOCK_PRETRAIN_CP2
L2_NeMo_2_NEVA_MOCK_FINETUNE_TP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_FINETUNE_TP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_NEVA_MOCK_FINETUNE_TP2
L2_NeMo_2_NEVA_MOCK_FINETUNE_PP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_FINETUNE_PP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_NEVA_MOCK_FINETUNE_PP2
L2_NeMo_2_NEVA_MOCK_FINETUNE_CP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_MOCK_FINETUNE_CP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_NEVA_MOCK_FINETUNE_CP2
L2_NeMo_2_NEVA_LOAD_GENERATE:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NEVA_LOAD_GENERATE')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_NEVA_LOAD_GENERATE
L2_NEMO_2_MLLAMA_Inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_MLLAMA_Inference')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NEMO_2_MLLAMA_Inference
L2_NeMo_2_MLLAMA_MOCK_FINETUNE_TP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_MLLAMA_MOCK_FINETUNE_TP2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_MLLAMA_MOCK_FINETUNE_TP2
L2_NeMo_2_Mixtral_Pretraining:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_Pretraining')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Mixtral_Pretraining
L2_NeMo_2_GPT_SFT_TP1PP1_MBS1:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS1')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1
L2_NeMo_2_GPT_SFT_TP1PP1_MBS2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_SFT_TP1PP1_MBS2
L2_NeMo_2_GPT_SFT_TP1PP2_MBS2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP2_MBS2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_SFT_TP1PP2_MBS2
L2_NeMo_2_GPT_SFT_TP2PP1_MBS2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP2PP1_MBS2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_SFT_TP2PP1_MBS2
L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED
L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1
L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2
L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2
L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2
L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED
L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED
L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED
L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat
L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2_exclude:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2_exclude')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2_exclude
L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2
L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1
L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1
L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1
L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1_exclude:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1_exclude')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1_exclude
L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1
L2_NEMO_2_LoRA_MERGE:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_MERGE')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NEMO_2_LoRA_MERGE
L2_NEMO_2_LoRA_Export:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_Export')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NEMO_2_LoRA_Export
L2_NEMO_2_LoRA_Inference:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NEMO_2_LoRA_Inference')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NEMO_2_LoRA_Inference
L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact
L2_NeMo_2_Automodel_PTQ_trtllm:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Automodel_PTQ_trtllm')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Automodel_PTQ_trtllm
L2_NeMo_2_Automodel_PTQ_hf:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Automodel_PTQ_hf')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Automodel_PTQ_hf
L2_NeMo_2_PTQ_Llama2_FP8_trtllm:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_PTQ_Llama2_FP8_trtllm')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_PTQ_Llama2_FP8_trtllm
L2_NeMo_2_PTQ_Llama2_FP8_nemo:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_PTQ_Llama2_FP8_nemo')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_PTQ_Llama2_FP8_nemo
L2_NeMo_2_Distill_Llama3_TP1PP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Distill_Llama3_TP1PP2') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Distill_Llama3_TP1PP2
L2_NeMo_2_Prune_Llama_TP1PP2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Prune_Llama_TP1PP2') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Prune_Llama_TP1PP2
L2_NeMo_2_Export_In_Framework:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Export_In_Framework')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Export_In_Framework
AFTER_SCRIPT: |
rm -rf /tmp/nemo2_ckpt /tmp/lambada.json
L2_NeMo_2_Export_Deploy_Query_In_Framework:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Export_Deploy_Query_In_Framework')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Export_Deploy_Query_In_Framework
AFTER_SCRIPT: |
rm -rf /tmp/nemo2_ckpt
L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING
L2_NeMo_2_LLAVA_NEXT_HF_CONVERSION:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_LLAVA_NEXT_HF_CONVERSION')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_LLAVA_NEXT_HF_CONVERSION
L2_NeMo_2_LLAVA_NEXT_ENERGON_TRAIN:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_LLAVA_NEXT_ENERGON_TRAIN')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_LLAVA_NEXT_ENERGON_TRAIN
L2_NeMo_2_LLAVA_NEXT_ENERGON_PACKED_TRAIN:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_LLAVA_NEXT_ENERGON_PACKED_TRAIN')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_LLAVA_NEXT_ENERGON_PACKED_TRAIN
L2_NeMo_2_CLIP_PRETRAIN:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_CLIP_PRETRAIN')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_CLIP_PRETRAIN
L2_NeMo_2_CLIP_INFER:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_CLIP_INFER')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_CLIP_INFER
L2_NeMo_2_VLLM_EXPORT:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_VLLM_EXPORT')
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_VLLM_EXPORT
L2_NeMo_2_EVAL:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_EVAL')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NeMo_2_EVAL
L2_NeMo_2_Auto_Configurator_llama_TP1_PP1_MBS124:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Auto_Configurator_llama_TP1_PP1_MBS124')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NeMo_2_Auto_Configurator_llama_TP1_PP1_MBS124
AFTER_SCRIPT: |
rm -rf examples/llm/auto_configurator/auto_conf_logs_llama
L2_NeMo_2_Auto_Configurator_bert_TP1_PP1_MBS124:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Auto_Configurator_bert_TP1_PP1_MBS124')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NeMo_2_Auto_Configurator_bert_TP1_PP1_MBS124
AFTER_SCRIPT: |
rm -rf examples/llm/auto_configurator/auto_conf_logs_bert
L2_NeMo_2_Auto_Configurator_t5_TP1_PP1_MBS124:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Auto_Configurator_t5_TP1_PP1_MBS124')
with:
RUNNER: self-hosted-azure-gpus-1
SCRIPT: L2_NeMo_2_Auto_Configurator_t5_TP1_PP1_MBS124
AFTER_SCRIPT: |
rm -rf examples/llm/auto_configurator/auto_conf_logs_t5
L2_SpeechLM_LoRA_TP1PP1_MBS2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_SpeechLM_LoRA_TP1PP1_MBS2') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_SpeechLM_LoRA_TP1PP1_MBS2
L2_NeMo_2_Conversion_Test_Baichuan2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Baichuan2') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Baichuan2
L2_NeMo_2_Conversion_Test_ChatGLM:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_ChatGLM') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_ChatGLM
L2_NeMo_2_Conversion_Test_DeepSeek:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_DeepSeek') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_DeepSeek
L2_NeMo_2_Conversion_Test_Gemma:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Gemma') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Gemma
L2_NeMo_2_Conversion_Test_Gemma2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Gemma2') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Gemma2
L2_NeMo_2_Conversion_Test_Mistral:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Mistral') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Mistral
L2_NeMo_2_Conversion_Test_Llama:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Llama') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Llama
L2_NeMo_2_Conversion_Test_Llama_Embedding:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Llama_Embedding
L2_NeMo_2_Conversion_Test_Nemotron:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Nemotron') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Nemotron
L2_NeMo_2_Conversion_Test_Phi3Mini:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Phi3Mini') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Phi3Mini
L2_NeMo_2_Conversion_Test_Qwen2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Qwen2') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Qwen2
L2_NeMo_2_Conversion_Test_Starcoder:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Starcoder') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Starcoder
L2_NeMo_2_Conversion_Test_Starcoder2:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_Starcoder2') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_Starcoder2
L2_NeMo_2_Conversion_Test_BERT:
needs: [pre-flight, cicd-test-container-build]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'L2_NeMo_2_Conversion_Test_BERT') || needs.pre-flight.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: L2_NeMo_2_Conversion_Test_BERT
Nemo_CICD_Test:
needs:
- pre-flight
- cicd-import-tests
- L0_Unit_Tests_GPU_ASR
- L0_Unit_Tests_GPU_Audio
- L0_Unit_Tests_GPU_Common
- L0_Unit_Tests_GPU_LLM
- L0_Unit_Tests_GPU_VLM
- L0_Unit_Tests_GPU_Multimodal
- L0_Unit_Tests_GPU_TTS
- L0_Unit_Tests_GPU_Core
- L0_Unit_Tests_GPU_Hydra
- L0_Unit_Tests_GPU_Lightning
- L0_Unit_Tests_GPU_Others
- L0_Unit_Tests_CPU_ASR
- L0_Unit_Tests_CPU_Audio
- L0_Unit_Tests_CPU_Common
- L0_Unit_Tests_CPU_LLM
- L0_Unit_Tests_CPU_VLM
- L0_Unit_Tests_CPU_Multimodal
- L0_Unit_Tests_CPU_TTS
- L0_Unit_Tests_CPU_Core
- L0_Unit_Tests_CPU_Hydra
- L0_Unit_Tests_CPU_Lightning
- L0_Unit_Tests_CPU_Others
# Nemo2 Conversion Tests
- L2_NeMo_2_Conversion_Test_Baichuan2
- L2_NeMo_2_Conversion_Test_ChatGLM
- L2_NeMo_2_Conversion_Test_DeepSeek
- L2_NeMo_2_Conversion_Test_Gemma
- L2_NeMo_2_Conversion_Test_Gemma2
- L2_NeMo_2_Conversion_Test_Llama
- L2_NeMo_2_Conversion_Test_Llama_Embedding
- L2_NeMo_2_Conversion_Test_Mistral
- L2_NeMo_2_Conversion_Test_Nemotron
- L2_NeMo_2_Conversion_Test_Phi3Mini
- L2_NeMo_2_Conversion_Test_Qwen2
- L2_NeMo_2_Conversion_Test_Starcoder
- L2_NeMo_2_Conversion_Test_Starcoder2
- L2_NeMo_2_Conversion_Test_BERT
- ASR_dev_run_Speech_to_Text
- ASR_dev_run_Speech_to_Text_WPE_CitriNet
- ASR_dev_run_Speech_Pre-training_-_CitriNet
- Optional_ASR_dev_run_Speech_To_Text_Finetuning
- Optional_ASR_dev_run_Speech_To_Text_HF_Finetuning
- ASR_dev_run_Speech_to_Text_WPE_-_Conformer
- ASR_dev_run-part_two_Speech_to_Text_WPE_-_Squeezeformer
- L2_Speech_to_Text_EMA
- L2_Speaker_dev_run_Speaker_Recognition
- L2_Speaker_dev_run_Speaker_Diarization
- L2_Speaker_dev_run_EndtoEnd_Speaker_Diarization_Sortformer
- L2_Speaker_dev_run_EndtoEnd_Diarizer_Inference
- L2_Speaker_dev_run_Speech_to_Label
- L2_Speaker_dev_run_Speaker_Diarization_with_ASR_Inference
- L2_Speaker_dev_run_Clustering_Diarizer_Inference
- L2_Speaker_dev_run_Neural_Diarizer_Inference
- L2_Speaker_dev_run_Multispeaker_ASR_Data_Simulation
- L2_ASR_Multi-dataloader_dev_run_Speech_to_Text_multi-dataloader
- L2_ASR_Multi-dataloader_dev_run_Speech_to_Label_multi-dataloader
- L2_ASR_Adapters_Linear_Adapters
- L2_ASR_Adapters_RelPos_MHA_Adapters
- L2_Speech_Transcription_Speech_to_Text_Transcribe
- L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Eng_CitriNet_with_wav
- L2_Segmentation_Tool_Parallel_ctc_segmentation_test_L2_Ru_QN_with_mp3
- L2_G2P_Models_G2P_Conformer_training_evaluation_and_inference
- L2_G2P_Models_HeteronymClassificationModel_training_evaluation_and_inference
- L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Post-LN
- L2_NMT_Attention_is_All_You_Need_Training_NMT_Training_Pre-LN
- L2_NMT_Attention_is_All_You_Need_Training_NMT_Multi-Validation
- L2_NMT_Attention_is_All_You_Need_Inference
- L2_NMT_Attention_is_All_You_Need_Finetuning
- L2_NMT_Tarred_Dataset_Creation_Auto_Tarred_Dataset_Creation
- L2_NMT_Tarred_Dataset_Creation_Script_Tarred_Dataset_Creation
- L2_Megatron_NMT_Training_TP2
- L2_TTS_Fast_dev_runs_1_Tacotron_2
- L2_TTS_Fast_dev_runs_1_WaveGlow
- L2_TTS_Fast_dev_runs_1_FastPitch
#- OPTIONAL_L2_TTS_Fast_dev_runs_1_RADTTS
- L2_TTS_Fast_dev_runs_1_Hifigan
- Speech_Checkpoints_tests
- L2_Stable_Diffusion_Training
- L2_NeMo_2_NEVA_MOCK_PRETRAIN_TP2
- L2_NeMo_2_NEVA_MOCK_PRETRAIN_PP2
- L2_NeMo_2_NEVA_MOCK_PRETRAIN_CP2
- L2_NeMo_2_NEVA_MOCK_FINETUNE_TP2
- L2_NeMo_2_NEVA_MOCK_FINETUNE_PP2
- L2_NeMo_2_NEVA_MOCK_FINETUNE_CP2
- L2_NeMo_2_NEVA_LOAD_GENERATE
- L2_NeMo_2_MLLAMA_MOCK_FINETUNE_TP2
- L2_NEMO_2_MLLAMA_Inference
- L2_NeMo_2_GPT_Pretraining_no_transformer_engine
- L2_NeMo_2_GPT_DDP_Param_Parity_check
- L2_NeMo_2_HF_MODEL_IMPORT
- L2_NeMo_2_llama3_pretraining_recipe
- L2_NeMo_2_llama3_fault_tolerance_plugin
- L2_NeMo_2_llama3_straggler_detection
- L2_HF_Transformer_PEFT_notebook
- L2_HF_Transformer_PEFT
- L2_HF_Transformer_PEFT_nemorun
- L2_HF_Transformer_PEFT_2gpu
- L2_HF_Transformer_PEFT_2gpu_FSDP2
- L2_HF_Transformer_PEFT_2gpu_FSDP2_liger
- L2_HF_Transformer_PEFT_2gpu_FSDP2_fp8
- L2_HF_Transformer_PEFT_2gpu_nemorun
- L2_HF_Transformer_SFT_notebook
- L2_HF_Transformer_SFT
- L2_HF_Transformer_SFT_nemorun
- L2_HF_Transformer_SFT_2gpu
- L2_HF_Transformer_SFT_2gpu_FSDP2
- L2_HF_Transformer_SFT_2gpu_FSDP2_fp8
- L2_VLM_HF_Transformer_PEFT
- L2_VLM_HF_Transformer_PEFT_FSDP2
- L2_VLM_HF_Transformer_PEFT_4bit
- L2_VLM_HF_Transformer_SFT_FSDP2
- L2_HF_Transformer_SFT_2gpu_nemorun
- L2_HF_Transformer_SFT_TE_Acceleration
- L2_HF_Transformer_PT
- L2_HF_Transformer_PT_nemorun
- L2_HF_Transformer_PT_2gpu
- L2_HF_Transformer_PT_2gpu_nemorun
- L2_HF_Transformer_PT_TE_Acceleration
- L2_HF_Transformer_SpeechLM_SFT_2gpu
- L2_NeMo_2_SSM_Pretraining
- L2_NeMo_2_SSM_Finetuning
- L2_NeMo_2_Hyena_Conversion_from_HF
- L2_NeMo_2_Hyena_DDP_Pretraining_Test
- L2_NeMo_2_T5_Pretraining
- L2_NeMo_2_T5_Finetuning
- L2_NeMo_2_T5_LoRA
- L2_NeMo_2_BERT_Pretraining_Megatron
- L2_NeMo_2_BERT_Pretraining_HuggingFace
- L2_NeMo_2_GPT_SFT_TP1PP1_MBS1
- L2_NeMo_2_GPT_SFT_TP1PP1_MBS2
- L2_NeMo_2_GPT_SFT_TP1PP2_MBS2
- L2_NeMo_2_GPT_SFT_TP2PP1_MBS2
- L2_NeMo_2_GPT_SFT_TP1PP1_MBS1_PACKED
- L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1
- L2_NeMo_2_GPT_LoRA_TP1PP1_MBS2
- L2_NeMo_2_GPT_LoRA_TP1PP2_MBS2
- L2_NeMo_2_GPT_LoRA_TP2PP1_MBS2
- L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_Chat
- L2_NeMo_2_GPT_LoRA_TP1PP1_MBS1_PACKED
- L2_NeMo_2_GPT_DoRA_TP1PP1_MBS1_PACKED
- L2_NeMo_2_GPT_CLoRA_TP1PP1_MBS1_PACKED
- L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2
- L2_NeMo_2_Mixtral_LoRA_TP1PP1_MBS1
- L2_NeMo_2_Mixtral_LoRA_TP2PP1_MBS1
- L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1
- L2_NeMo_2_Mistral_LoRA_TP2PP1_MBS1
- L2_NeMo_2_Mistral_LoRA_TP1PP1_MBS1_exclude
- L2_NeMo_2_Mixtral_LoRA_EP2PP1_MBS2_exclude
- L2_NEMO_2_LoRA_MERGE
- L2_NEMO_2_LoRA_Export
- L2_NEMO_2_LoRA_Inference
- L2_NeMo_2_Mixtral_Pretraining
- L2_NeMo_2_Auto_Configurator_llama_TP1_PP1_MBS124
- L2_NeMo_2_Auto_Configurator_bert_TP1_PP1_MBS124
- L2_NeMo_2_Auto_Configurator_t5_TP1_PP1_MBS124
- L2_Speech_to_Text_AED
- L2_Speech_Estimate_Duration_Bins
- L2_Speech_Batch_Size_OOMptimizer
# - Optional_L2_Speech_Batch_Size_OOMptimizer_Canary
- L2_Speech_Transcription_Canary_Transcribe_Full_Manifest
- L2_Speech_Transcription_Canary_Transcribe_With_Prompt
- L2_Speech_Transcription_Canary_Transcribe_Audio_Dir
- L2_Longform_Speech_Transcription_Canary_Chunked_Infer_from_Audio_Dir
- L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Audio_Dir
- L2_Longform_Speech_Transcription_with_TimeStamps_Canary_Chunked_Infer_from_Manifest
- L2_NeMo_2_NeMo_Mcore_Mixtral_bitexact
- L2_NeMo_2_Automodel_PTQ_trtllm
- L2_NeMo_2_Automodel_PTQ_hf
- L2_NeMo_2_PTQ_Llama2_FP8_trtllm
- L2_NeMo_2_PTQ_Llama2_FP8_nemo
- L2_NeMo_2_Distill_Llama3_TP1PP2
- L2_NeMo_2_Prune_Llama_TP1PP2
- L2_NeMo_2_Export_In_Framework
- L2_NeMo_2_Export_Deploy_Query_In_Framework
- L2_NeMo_2_jit_callback
- L2_NeMo_2_LLAVA_NEXT_MOCK_TRAINING
- L2_NeMo_2_LLAVA_NEXT_HF_CONVERSION
- L2_NeMo_2_LLAVA_NEXT_ENERGON_TRAIN
- L2_NeMo_2_LLAVA_NEXT_ENERGON_PACKED_TRAIN
- L2_NeMo_2_CLIP_PRETRAIN
- L2_NeMo_2_CLIP_INFER
- L2_HF_Transformer_SFT_FSDP2_2gpu
- L2_HF_Transformer_SFT_2gpu_nemorun_fsdp2
- L2_NeMo_2_VLLM_EXPORT
- L2_NeMo_2_EVAL
- L2_SpeechLM_LoRA_TP1PP1_MBS2
if: always() && github.event != 'push'
runs-on: ubuntu-latest
permissions: write-all
steps:
- name: Evaluate conclusion
if: ${{ always() }}
id: pipeline-conclusion
run: |
# Slack notifications are send only on test failure (not cancelled):
FAILED=${{ contains(needs.*.outputs.conclusion, 'failure') && github.event.label.name == 'Run CICD' }}
echo "FAILED=$FAILED" >> $GITHUB_OUTPUT
# Mark as successful if no job was cancelled:
SUCCESS=${{ !contains(needs.*.outputs.conclusion, 'failure') && !contains(needs.*.result, 'cancelled') && !contains(needs.*.result, 'skipped') }}
echo "SUCCESS=$SUCCESS" >> $GITHUB_OUTPUT
- name: Checkout for GH CLI
uses: actions/checkout@v4
- name: Remove label if not cancelled
if: ${{ !contains(needs.*.result, 'cancelled') && github.event.label.name == 'Run CICD' && github.event.pull_request.head.repo.full_name == github.repository }}
env:
GH_TOKEN: ${{ github.token }}
PR_NUMBER: ${{ github.event.number }}
run: gh pr edit "$PR_NUMBER" --remove-label "Run CICD"
# This should depend on all the tests so we block/unblock based on all tests passing
- name: Pipeline successful, set exit code to 0
if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'true' }}
run: exit 0
- name: Pipeline successful, add PR comment
if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'true' && github.event_name == 'pull_request' && env.SLACK_WEBHOOK != '' }}
uses: peter-evans/create-or-update-comment@v4
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPOSITORY: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
with:
issue-number: ${{ github.event.number }}
body: |
[🤖]: Hi @${{ github.event.pull_request.user.login }} 👋,
We wanted to let you know that a [CICD pipeline](https://github.com/${{ env.REPOSITORY }}/actions/runs/${{ env.RUN_ID }}) for this PR just finished successfully
So it might be time to merge this PR or get some approvals
I'm just a bot so I'll leave it you what to do next.
//cc @pablo-garay @ko3n1g
- name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary"
if: ${{ always() && steps.pipeline-conclusion.outputs.FAILED == 'true' && env.SLACK_WEBHOOK != '' }}
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_ACTOR: ${{ github.actor }}
BRANCH: ${{ github.head_ref || github.ref_name }}
REPOSITORY: ${{ github.repository }}
RUN_ID: ${{ github.run_id }}
PR_NUMBER: ${{ github.event.number }}
SERVER_URL: ${{ github.server_url }}
run: |
set -x
PR_INFO=$(curl -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GITHUB_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/$REPOSITORY/pulls/$PR_NUMBER
)
PR_URL=$(echo -E $PR_INFO | jq '.html_url' | tr -d '"')
PR_TITLE=$(echo -E $PR_INFO | jq '.title' | tr -d '"')
PIPELINE_URL=$SERVER_URL/$REPOSITORY/actions/runs/$RUN_ID
BASE_MESSAGE='
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "🚨 *CI/CD failure at <'$PIPELINE_URL'|NeMo CI>*."
}
}
]
}
'
# Since this workflow contains more than 100 jobs, we need to iterate over job pages
JOBS='[]'
PAGE=1
while : ; do
JOBS_URL="https://api.github.com/repos/$REPOSITORY/actions/runs/$RUN_ID/jobs?page=$PAGE&per_page=100"
RESPONSE=$(curl -s -H "Authorization: token $GITHUB_TOKEN" $JOBS_URL | jq '.jobs')
JOBS=$(echo -e "$JOBS\n$RESPONSE" | jq -cs 'add')
if [[ $(echo $RESPONSE | jq 'length') -lt 100 ]]; then
break
else
PAGE=$(( PAGE + 1))
fi
done
SUMMARY="[]"
echo "Failed jobs: " | tee -a $GITHUB_STEP_SUMMARY
while IFS= read -r JOB; do
JOB_NAME="$(echo $JOB | jq '.key' | tr -d '"') / main"
JOB_ID=$(echo $JOBS | jq --arg job_name "$JOB_NAME" '.[] | select(.name == $job_name) | .id')
JOB_URL="https://github.com/$REPOSITORY/actions/runs/$RUN_ID/job/$JOB_ID"
echo "* [$JOB_NAME]($JOB_URL)" | tee -a $GITHUB_STEP_SUMMARY
LOGS=$(echo $JOB | yq '(.value.outputs.log | @base64d)' | tr -d '"')
LOGS=$([[ $(echo $LOGS | wc -c) -gt 0 ]] && echo -E "\`\`\`\n$LOGS\n\`\`\`" || echo "")
LOGS=$([[ $(echo $JOB | yq '.value.outputs.potential_infra_failure') == "true" ]] && echo -E "$LOGS\n\ncc: $SLACK_WEBHOOK_ADMIN" || echo -E "$LOGS")
SUMMARY=$(echo "$SUMMARY" | jq \
--arg pr "<$PR_URL|$PR_TITLE>" \
--arg job "<$JOB_URL|$JOB_NAME>" \
--arg logs "$(echo -e "$LOGS")" \
--arg author "<https://github.com/$GITHUB_ACTOR|$GITHUB_ACTOR>" \
--arg branch "<https://github.com/$REPOSITORY/tree/$BRANCH|$BRANCH>"\
'. += [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": (
"PR: " + $pr
+ "\nJob: " + $job
+ "\nAuthor: " + $author
+ "\nBranch: " + $branch
+ "\nLogs:" + $logs
)
}
}
]')
done <<<$(echo '${{ toJSON(needs) }}' | jq -c 'to_entries | .[] | select(.value.outputs.conclusion == "failure")')
MESSAGE=$(echo $BASE_MESSAGE | jq -c --argjson summary "$SUMMARY" '.blocks += $summary')
curl -X POST -H "Content-type: application/json" --data "$MESSAGE" $SLACK_WEBHOOK
- name: Pipeline not successful, set exit code to 1
if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'false' }}
run: exit 1
Coverage:
runs-on: ubuntu-latest
needs: [Nemo_CICD_Test]
strategy:
matrix:
flag: [unit-test, e2e]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Download coverage reports of current branch
uses: actions/download-artifact@v4
with:
pattern: coverage-${{ matrix.flag }}-*
- name: Get total coverage of current branch
shell: bash -x -e -u -o pipefail {0}
if: always()
run: |
pip install coverage
ls -al .
ls -al coverage-*/
coverage combine --keep $(ls coverage-*/.coverage)
coverage report -i
rm -rf coverage-*
ls -al
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
verbose: true
flags: ${{ matrix.flag }}
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: coverage-${{ matrix.flag }}-aggregated
path: |
.coverage
include-hidden-files: true