NVIDIA-NeMo · yaoyu-33 · Feb 12, 2026 · coderabbitai · Feb 12, 2026
diff --git a/3rdparty/Megatron-LM b/3rdparty/Megatron-LM
diff --git a/GIT_COMMIT_SKILLS.md b/GIT_COMMIT_SKILLS.md
@@ -0,0 +1,143 @@
+# Git Commit Workflow
+
+## Standard Workflow for Committing Changes
+
+### 1. Check Current Branch
+```bash
+git branch --show-current
+```
+
+### 2. Create Feature Branch (REQUIRED if on main)
+**Important**: You must create a feature branch before committing if you're on `main`. Never commit directly to `main`.
+
+Check your current branch:
+```bash
+CURRENT_BRANCH=$(git branch --show-current)
+if [ "$CURRENT_BRANCH" = "main" ]; then
+    git checkout -b feature/your-feature-name
+fi
+```
+
+Or manually:
+```bash
+git checkout -b feature/your-feature-name
+```
+
+### 3. Stage Your Changes
+```bash
+git add <file1> <file2> ...
+# Or stage all modified files:
+git add -u
+```
+
+### 4. Run Pre-commit Hooks
+```bash
+export PATH="/Users/yuya/Library/Python/3.9/bin:$PATH"
+pre-commit run
-export PATH="/Users/yuya/Library/Python/3.9/bin:$PATH"
-pre-commit run
+pre-commit run
-export PATH="/Users/yuya/Library/Python/3.9/bin:$PATH"
-pre-commit run
+pre-commit run
+```
+This will run all pre-commit hooks (formatting, linting, etc.) before committing.
+
+### 5. Commit with Sign-off
+```bash
+git commit -s -m "[module] type: Your descriptive commit message"
+```
+The `-s` flag adds a Signed-off-by line to the commit message. See [Commit and PR Title Format](#commit-and-pr-title-format) for proper formatting.
+
+### 6. Push to Remote
+```bash
+git push
+```
+
+### 7. Check for Existing PR
+```bash
+gh pr list --head <your-branch-name> --json number --jq '.[0].number'
+```
+
+### 8. Trigger CI Testing
+If a PR exists, comment on it to trigger CI:
+```bash
+COMMIT_HASH=$(git rev-parse HEAD)
+gh pr comment <PR_NUMBER> --body "/ok to test $COMMIT_HASH"
+```
+
+## Example Workflow
+
+```bash
+# 1. Check branch
+CURRENT_BRANCH=$(git branch --show-current)
+echo "Current branch: $CURRENT_BRANCH"
+
+# 2. Create feature branch if on main (REQUIRED)
+if [ "$CURRENT_BRANCH" = "main" ]; then
+    git checkout -b feature/your-feature-name
+fi
+
+# 3. Stage changes
+git add tests/unit_tests/models/gemma_vl/test_gemma3_vl_bridge.py
+
+# 4. Run pre-commit
+export PATH="/Users/yuya/Library/Python/3.9/bin:$PATH"
+pre-commit run
+
+# 5. Commit with sign-off
+git commit -s -m "[test] fix: Fix gemma3_vl bridge test for image_token_id default"
+
+# 6. Push
+git push
+
+# 7. Check for PR and trigger CI
+PR_NUMBER=$(gh pr list --head feature/provider-bridge-refactor-3 --json number --jq '.[0].number')
+COMMIT_HASH=$(git rev-parse HEAD)
+gh pr comment $PR_NUMBER --body "/ok to test $COMMIT_HASH"
+```
+
+## Commit and PR Title Format
+
+Format your commit messages and PR titles as:
+
+```text
+[{modules}] {type}: {description}
+```
+
+### Modules
+Use the most relevant ones, separate multiple with `,`:
+- `model` - Model implementations and bridges
+- `recipe` - Training recipes
+- `training` - Training loop and utilities
+- `data` - Data loading and processing
+- `ckpt` - Checkpoint conversion and saving
+- `peft` - Parameter-efficient fine-tuning (LoRA, etc.)
+- `perf` - Performance optimizations
+- `ci` - CI/CD configuration
+- `doc` - Documentation
+- `test` - Tests
+- `build` - Build system and dependencies
+- `misc` - Other changes
+
+### Types
+- `feat` - New feature
+- `fix` - Bug fix
+- `refactor` - Code refactoring without changing functionality
+- `chore` - Maintenance tasks
+- `test` - Adding or updating tests
+
+### Breaking Changes
+If your PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title.
+
+### Examples
+```text
+[model] feat: Add Qwen3 model bridge
+[recipe, doc] feat: Add Llama 3.1 70B recipe with documentation
+[ckpt] fix: Handle missing keys in HF checkpoint conversion
+[BREAKING][training] refactor: Change optimizer config structure
+[ci, build] chore: Update ruff version
+[test] fix: Fix gemma3_vl bridge test for image_token_id default
+```
+
+## Notes
+
+- **Never commit directly to `main`** - Always create a feature branch first
+- Always run `pre-commit run` before committing to catch formatting/linting issues early
+- Use descriptive commit messages following the format above
+- The `-s` flag is required for DCO (Developer Certificate of Origin) compliance
+- If pre-commit modifies files, you may need to stage them again before committing
diff --git a/REMOTE_DEBUG_SKILLS.md b/REMOTE_DEBUG_SKILLS.md
diff --git a/examples/rl/rlhf_with_bridge.py b/examples/rl/rlhf_with_bridge.py
@@ -33,16 +33,15 @@
 
 Run (single GPU)
 ```bash
-export CUDA_VISIBLE_DEVICES=0
-python examples/rl/rlhf_with_bridge.py \
+uv run python examples/rl/rlhf_with_bridge.py \
   --hf-policy-model Qwen/Qwen3-0.6B \
   --hf-reward-model distilbert-base-uncased-finetuned-sst-2-english \
   --train-iters 5 --mbs 1 --gbs 1 --seq-length 256 --max-new-tokens 32
 ```
 
 Run (multi-GPU)
 ```bash
-torchrun --nproc_per_node=2 examples/rl/rlhf_with_bridge.py \
+uv run python -m torch.distributed.run --nproc_per_node=2 examples/rl/rlhf_with_bridge.py \
   --hf-policy-model Qwen/Qwen3-0.6B \
   --hf-reward-model distilbert-base-uncased-finetuned-sst-2-english \
   --train-iters 20 --mbs 1 --gbs 2 --seq-length 256 --max-new-tokens 32
@@ -62,6 +61,7 @@
 import torch
 import torch.nn.functional as F
 from megatron.core.pipeline_parallel import get_forward_backward_func
+from megatron.core.process_groups_config import ProcessGroupCollection
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
 from megatron.bridge import AutoBridge
@@ -94,6 +94,7 @@ class Args:
     global_batch_size: int
     train_iters: int
     seq_length: int
+    trust_remote_code: bool = False
 
 
 def build_config(provider, args: Args) -> ConfigContainer:
@@ -233,6 +234,7 @@ def main() -> None:
         global_batch_size=ns.gbs,
         train_iters=ns.train_iters,
         seq_length=ns.seq_length,
+        trust_remote_code=ns.trust_remote_code,
     )
 
     # Resolve per-rank device up front for multi-GPU runs
@@ -296,13 +298,17 @@ def main() -> None:
     initialize_megatron(cfg=cfg)
     set_jit_fusion_options(cfg.model, cfg.train.micro_batch_size)
 
+    # Get process group collection after initialization
+    pg_collection = ProcessGroupCollection.use_mpu_process_groups()
+
     # Build model + optimizer + scheduler
     model_list = get_model(
         cfg.model,
         cfg.ddp,
         overlap_param_gather_with_optimizer_step=False,
         use_torch_fsdp2=cfg.dist.use_torch_fsdp2,
         data_parallel_random_init=cfg.rng.data_parallel_random_init,
+        pg_collection=pg_collection,
     )
     model = model_list[0]
     optimizer, scheduler = setup_optimizer(

diff --git a/tutorials/training/reduced_precision_training.ipynb b/tutorials/training/reduced_precision_training.ipynb
@@ -119,7 +119,7 @@
     "# - train_text_document.bin (document/sequence data)\n",
     "# - train_text_document.idx (document/sequence metadata)\n",
     "\n",
-    "MEGATRON_LM_PATH=/opt/megatron-lm/\n",
+    "MEGATRON_LM_PATH=/opt/Megatron-Bridge/3rdparty/Megatron-LM/\n",
     "\n",
     "echo \"Tokenizing training data...\"\n",
     "python3 $MEGATRON_LM_PATH/tools/preprocess_data.py \\\n",