diff --git a/.github/workflows/renovate.yml b/.github/workflows/renovate.yml index 85dd865ede..418ee7a1e8 100644 --- a/.github/workflows/renovate.yml +++ b/.github/workflows/renovate.yml @@ -47,19 +47,19 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: submodules: 'recursive' - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: - python-version: '3.12' + python-version: '3.14.3' - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7 with: - version: "0.9.1" + version: "0.10.12" enable-cache: true prune-cache: false @@ -128,13 +128,13 @@ jobs: - name: Get GitHub App token id: get-app-token continue-on-error: true - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3 with: app-id: ${{ secrets.RENOVATE_APP_ID }} private-key: ${{ secrets.RENOVATE_APP_PRIVATE_KEY }} - name: Run Renovate - uses: renovatebot/github-action@v44.0.3 + uses: renovatebot/github-action@68a3ea99af6ad249940b5a9fdf44fc6d7f14378b # v46.1.6 with: configurationFile: .github/renovate.json # Use GitHub App token if available, otherwise fall back to PAT diff --git a/3rdparty/Automodel-workspace/Automodel b/3rdparty/Automodel-workspace/Automodel index 4cb1d5db2c..0ce8695236 160000 --- a/3rdparty/Automodel-workspace/Automodel +++ b/3rdparty/Automodel-workspace/Automodel @@ -1 +1 @@ -Subproject commit 4cb1d5db2c7f91c19104ee410946f892f007aa09 +Subproject commit 0ce8695236b698ffb2680b235a0e8579f288bd68 diff --git a/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge b/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge index 8aa287df3c..63af02a820 160000 --- a/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge +++ b/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge @@ -1 +1 @@ -Subproject commit 8aa287df3ca6833c78733460f0c0f0bcfb79f5de +Subproject commit 63af02a8206f1e3c5496d62e32fd68de3fc14be4 diff --git a/3rdparty/Megatron-Bridge-workspace/setup.py b/3rdparty/Megatron-Bridge-workspace/setup.py index 05d3f32e83..2890846f6f 100644 --- a/3rdparty/Megatron-Bridge-workspace/setup.py +++ b/3rdparty/Megatron-Bridge-workspace/setup.py @@ -26,23 +26,37 @@ bridge_package_name = "megatron.bridge" CACHED_DEPENDENCIES = [ - "datasets", + "transformers>=5.0.0,<=5.3.0", + "peft>=0.18.1", + "datasets>=2.20.0", + "accelerate", + "diffusers>=0.36.0", + "peft>=0.18.0", + "einops", + "imageio", + "imageio-ffmpeg", "omegaconf>=2.3.0", "tensorboard>=2.19.0", "typing-extensions", "rich", - "wandb>=0.19.10", + "wandb>=0.25.0", "six>=1.17.0", "regex>=2024.11.6", "pyyaml>=6.0.2", "tqdm>=4.67.1", "hydra-core>1.3,<=1.3.2", - "megatron-core[dev,mlm]>=0.15.0a0,<0.16.0", + "megatron-core[dev,mlm]", "qwen-vl-utils", - "transformer-engine[pytorch]>=2.9.0a0,<2.10.0", + "transformer-engine[pytorch,core_cu13]", "mamba-ssm", - "nvidia-resiliency-ext", + "nvidia-resiliency-ext~=0.5.0", "causal-conv1d", + "flash-linear-attention", + "timm", + "open-clip-torch>=3.2.0", + "mlflow>=3.5.0", + "comet-ml>=3.50.0", + "torch>=2.6.0", ] # If the bridge source exists, compare cached dependencies with the submodule's pyproject diff --git a/3rdparty/Megatron-LM-workspace/Megatron-LM b/3rdparty/Megatron-LM-workspace/Megatron-LM index 76065f17e1..9fc9377109 160000 --- a/3rdparty/Megatron-LM-workspace/Megatron-LM +++ b/3rdparty/Megatron-LM-workspace/Megatron-LM @@ -1 +1 @@ -Subproject commit 76065f17e1e1e2850d1e9009ae5f601007aeeeb3 +Subproject commit 9fc9377109abd18b2a97f897e0a3565ad40a1a66 diff --git a/github/actions/test-template/action.yml b/github/actions/test-template/action.yml index ab57aebc01..0c7306f58f 100644 --- a/github/actions/test-template/action.yml +++ b/github/actions/test-template/action.yml @@ -70,7 +70,7 @@ runs: - name: Azure Login if: ${{ inputs.has-azure-credentials == 'true' }} - uses: azure/login@v2 + uses: azure/login@532459ea530d8321f2fb9bb10d1e0bcf23869a43 # v3 with: client-id: ${{ inputs.azure-client-id }} tenant-id: ${{ inputs.azure-tenant-id }} @@ -153,7 +153,7 @@ runs: echo "id=$(uuidgen)" >> "$GITHUB_OUTPUT" - name: Checkout NeMo - uses: actions/checkout@v2 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 env: DIR: ${{ github.run_id }} with: @@ -211,7 +211,7 @@ runs: exit $EXIT_CODE - name: Upload artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 if: ${{ steps.check.outputs.coverage_report != 'none' }} with: name: ${{ steps.check.outputs.coverage_report }} diff --git a/pyproject.toml b/pyproject.toml index 743c7dea0b..d843928f7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,8 +57,8 @@ automodel = [ # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular) # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108 # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76 - "vllm==0.11.0", # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/811 resolved - "flash-attn==2.8.1", + "vllm==0.18.0", # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/811 resolved + "flash-attn==2.8.3", "mamba-ssm", "causal-conv1d", ] @@ -69,10 +69,10 @@ vllm = [ # sudo apt-get update # sudo apt-get install libibverbs-dev "deep_ep @ git+https://github.com/deepseek-ai/DeepEP.git@e3908bf5bd0cc6265bcb225d15cd8c996d4759ef", - "vllm==0.11.0", + "vllm==0.18.0", "num2words>=0.5.14", # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved - "flash-attn==2.8.1", + "flash-attn==2.8.3", # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved "mamba-ssm", # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved @@ -88,15 +88,15 @@ mcore = [ # This dependency also needs to be compatible with the spec in Megatron-Bridge/pyproject.toml. # It is specified here since we don't directly use Megatron-Bridge/pyproject.toml, but a proxy setup.py+pyproject.toml combo # outside to allow "optionally" installing the megatron path. It's simpler to deal with transformer-engine here in the NeMo RL pyproject.toml - "transformer-engine[pytorch]==2.8.0", + "transformer-engine[pytorch]==2.12.0", "megatron-core", "megatron-bridge", # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved - "vllm==0.11.0", + "vllm==0.18.0", # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular) # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108 # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76 - "flash-attn==2.8.1", + "flash-attn==2.8.3", ] penguin = ["penguin"]